xref: /src/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp (revision c80e69b00d976a5a3b3e84527f270fa7e72a8205)
11d5ae102SDimitry Andric //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
21d5ae102SDimitry Andric //
31d5ae102SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41d5ae102SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
51d5ae102SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61d5ae102SDimitry Andric //
71d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
81d5ae102SDimitry Andric 
91d5ae102SDimitry Andric #include "llvm/CodeGen/ModuloSchedule.h"
101d5ae102SDimitry Andric #include "llvm/ADT/StringExtras.h"
11cfca06d7SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
121d5ae102SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
131d5ae102SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
14145449b1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
151d5ae102SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
16706b4fc4SDimitry Andric #include "llvm/InitializePasses.h"
171d5ae102SDimitry Andric #include "llvm/MC/MCContext.h"
181d5ae102SDimitry Andric #include "llvm/Support/Debug.h"
191d5ae102SDimitry Andric #include "llvm/Support/ErrorHandling.h"
201d5ae102SDimitry Andric #include "llvm/Support/raw_ostream.h"
211d5ae102SDimitry Andric 
221d5ae102SDimitry Andric #define DEBUG_TYPE "pipeliner"
231d5ae102SDimitry Andric using namespace llvm;
241d5ae102SDimitry Andric 
25ac9a064cSDimitry Andric static cl::opt<bool> SwapBranchTargetsMVE(
26ac9a064cSDimitry Andric     "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
27ac9a064cSDimitry Andric     cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28ac9a064cSDimitry Andric 
print(raw_ostream & OS)291d5ae102SDimitry Andric void ModuloSchedule::print(raw_ostream &OS) {
301d5ae102SDimitry Andric   for (MachineInstr *MI : ScheduledInstrs)
311d5ae102SDimitry Andric     OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
321d5ae102SDimitry Andric }
331d5ae102SDimitry Andric 
341d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
351d5ae102SDimitry Andric // ModuloScheduleExpander implementation
361d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
371d5ae102SDimitry Andric 
381d5ae102SDimitry Andric /// Return the register values for  the operands of a Phi instruction.
391d5ae102SDimitry Andric /// This function assume the instruction is a Phi.
getPhiRegs(MachineInstr & Phi,MachineBasicBlock * Loop,unsigned & InitVal,unsigned & LoopVal)401d5ae102SDimitry Andric static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
411d5ae102SDimitry Andric                        unsigned &InitVal, unsigned &LoopVal) {
421d5ae102SDimitry Andric   assert(Phi.isPHI() && "Expecting a Phi.");
431d5ae102SDimitry Andric 
441d5ae102SDimitry Andric   InitVal = 0;
451d5ae102SDimitry Andric   LoopVal = 0;
461d5ae102SDimitry Andric   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
471d5ae102SDimitry Andric     if (Phi.getOperand(i + 1).getMBB() != Loop)
481d5ae102SDimitry Andric       InitVal = Phi.getOperand(i).getReg();
491d5ae102SDimitry Andric     else
501d5ae102SDimitry Andric       LoopVal = Phi.getOperand(i).getReg();
511d5ae102SDimitry Andric 
521d5ae102SDimitry Andric   assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
531d5ae102SDimitry Andric }
541d5ae102SDimitry Andric 
551d5ae102SDimitry Andric /// Return the Phi register value that comes from the incoming block.
getInitPhiReg(MachineInstr & Phi,MachineBasicBlock * LoopBB)561d5ae102SDimitry Andric static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
571d5ae102SDimitry Andric   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
581d5ae102SDimitry Andric     if (Phi.getOperand(i + 1).getMBB() != LoopBB)
591d5ae102SDimitry Andric       return Phi.getOperand(i).getReg();
601d5ae102SDimitry Andric   return 0;
611d5ae102SDimitry Andric }
621d5ae102SDimitry Andric 
631d5ae102SDimitry Andric /// Return the Phi register value that comes the loop block.
getLoopPhiReg(MachineInstr & Phi,MachineBasicBlock * LoopBB)641d5ae102SDimitry Andric static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
651d5ae102SDimitry Andric   for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
661d5ae102SDimitry Andric     if (Phi.getOperand(i + 1).getMBB() == LoopBB)
671d5ae102SDimitry Andric       return Phi.getOperand(i).getReg();
681d5ae102SDimitry Andric   return 0;
691d5ae102SDimitry Andric }
701d5ae102SDimitry Andric 
expand()711d5ae102SDimitry Andric void ModuloScheduleExpander::expand() {
721d5ae102SDimitry Andric   BB = Schedule.getLoop()->getTopBlock();
731d5ae102SDimitry Andric   Preheader = *BB->pred_begin();
741d5ae102SDimitry Andric   if (Preheader == BB)
751d5ae102SDimitry Andric     Preheader = *std::next(BB->pred_begin());
761d5ae102SDimitry Andric 
771d5ae102SDimitry Andric   // Iterate over the definitions in each instruction, and compute the
781d5ae102SDimitry Andric   // stage difference for each use.  Keep the maximum value.
791d5ae102SDimitry Andric   for (MachineInstr *MI : Schedule.getInstructions()) {
801d5ae102SDimitry Andric     int DefStage = Schedule.getStage(MI);
817fa27ce4SDimitry Andric     for (const MachineOperand &Op : MI->all_defs()) {
821d5ae102SDimitry Andric       Register Reg = Op.getReg();
831d5ae102SDimitry Andric       unsigned MaxDiff = 0;
841d5ae102SDimitry Andric       bool PhiIsSwapped = false;
85c0981da4SDimitry Andric       for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
861d5ae102SDimitry Andric         MachineInstr *UseMI = UseOp.getParent();
871d5ae102SDimitry Andric         int UseStage = Schedule.getStage(UseMI);
881d5ae102SDimitry Andric         unsigned Diff = 0;
891d5ae102SDimitry Andric         if (UseStage != -1 && UseStage >= DefStage)
901d5ae102SDimitry Andric           Diff = UseStage - DefStage;
911d5ae102SDimitry Andric         if (MI->isPHI()) {
921d5ae102SDimitry Andric           if (isLoopCarried(*MI))
931d5ae102SDimitry Andric             ++Diff;
941d5ae102SDimitry Andric           else
951d5ae102SDimitry Andric             PhiIsSwapped = true;
961d5ae102SDimitry Andric         }
971d5ae102SDimitry Andric         MaxDiff = std::max(Diff, MaxDiff);
981d5ae102SDimitry Andric       }
991d5ae102SDimitry Andric       RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
1001d5ae102SDimitry Andric     }
1011d5ae102SDimitry Andric   }
1021d5ae102SDimitry Andric 
1031d5ae102SDimitry Andric   generatePipelinedLoop();
1041d5ae102SDimitry Andric }
1051d5ae102SDimitry Andric 
generatePipelinedLoop()1061d5ae102SDimitry Andric void ModuloScheduleExpander::generatePipelinedLoop() {
1071d5ae102SDimitry Andric   LoopInfo = TII->analyzeLoopForPipelining(BB);
1081d5ae102SDimitry Andric   assert(LoopInfo && "Must be able to analyze loop!");
1091d5ae102SDimitry Andric 
1101d5ae102SDimitry Andric   // Create a new basic block for the kernel and add it to the CFG.
1111d5ae102SDimitry Andric   MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1121d5ae102SDimitry Andric 
1131d5ae102SDimitry Andric   unsigned MaxStageCount = Schedule.getNumStages() - 1;
1141d5ae102SDimitry Andric 
1151d5ae102SDimitry Andric   // Remember the registers that are used in different stages. The index is
1161d5ae102SDimitry Andric   // the iteration, or stage, that the instruction is scheduled in.  This is
1171d5ae102SDimitry Andric   // a map between register names in the original block and the names created
1181d5ae102SDimitry Andric   // in each stage of the pipelined loop.
1191d5ae102SDimitry Andric   ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120e3b55780SDimitry Andric 
121e3b55780SDimitry Andric   // The renaming destination by Phis for the registers across stages.
122e3b55780SDimitry Andric   // This map is updated during Phis generation to point to the most recent
123e3b55780SDimitry Andric   // renaming destination.
124e3b55780SDimitry Andric   ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125e3b55780SDimitry Andric 
1261d5ae102SDimitry Andric   InstrMapTy InstrMap;
1271d5ae102SDimitry Andric 
1281d5ae102SDimitry Andric   SmallVector<MachineBasicBlock *, 4> PrologBBs;
1291d5ae102SDimitry Andric 
1301d5ae102SDimitry Andric   // Generate the prolog instructions that set up the pipeline.
1311d5ae102SDimitry Andric   generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
1321d5ae102SDimitry Andric   MF.insert(BB->getIterator(), KernelBB);
1331de139fdSDimitry Andric   LIS.insertMBBInMaps(KernelBB);
1341d5ae102SDimitry Andric 
1351d5ae102SDimitry Andric   // Rearrange the instructions to generate the new, pipelined loop,
1361d5ae102SDimitry Andric   // and update register names as needed.
1371d5ae102SDimitry Andric   for (MachineInstr *CI : Schedule.getInstructions()) {
1381d5ae102SDimitry Andric     if (CI->isPHI())
1391d5ae102SDimitry Andric       continue;
1401d5ae102SDimitry Andric     unsigned StageNum = Schedule.getStage(CI);
1411d5ae102SDimitry Andric     MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
1421d5ae102SDimitry Andric     updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
1431d5ae102SDimitry Andric     KernelBB->push_back(NewMI);
1441d5ae102SDimitry Andric     InstrMap[NewMI] = CI;
1451d5ae102SDimitry Andric   }
1461d5ae102SDimitry Andric 
1471d5ae102SDimitry Andric   // Copy any terminator instructions to the new kernel, and update
1481d5ae102SDimitry Andric   // names as needed.
149c0981da4SDimitry Andric   for (MachineInstr &MI : BB->terminators()) {
150c0981da4SDimitry Andric     MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
1511d5ae102SDimitry Andric     updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
1521d5ae102SDimitry Andric     KernelBB->push_back(NewMI);
153c0981da4SDimitry Andric     InstrMap[NewMI] = &MI;
1541d5ae102SDimitry Andric   }
1551d5ae102SDimitry Andric 
1561d5ae102SDimitry Andric   NewKernel = KernelBB;
1571d5ae102SDimitry Andric   KernelBB->transferSuccessors(BB);
1581d5ae102SDimitry Andric   KernelBB->replaceSuccessor(BB, KernelBB);
1591d5ae102SDimitry Andric 
1601d5ae102SDimitry Andric   generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
1611d5ae102SDimitry Andric                        InstrMap, MaxStageCount, MaxStageCount, false);
162e3b55780SDimitry Andric   generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
163e3b55780SDimitry Andric                InstrMap, MaxStageCount, MaxStageCount, false);
1641d5ae102SDimitry Andric 
1651d5ae102SDimitry Andric   LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
1661d5ae102SDimitry Andric 
1671d5ae102SDimitry Andric   SmallVector<MachineBasicBlock *, 4> EpilogBBs;
1681d5ae102SDimitry Andric   // Generate the epilog instructions to complete the pipeline.
169e3b55780SDimitry Andric   generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
170e3b55780SDimitry Andric                  PrologBBs);
1711d5ae102SDimitry Andric 
1721d5ae102SDimitry Andric   // We need this step because the register allocation doesn't handle some
1731d5ae102SDimitry Andric   // situations well, so we insert copies to help out.
1741d5ae102SDimitry Andric   splitLifetimes(KernelBB, EpilogBBs);
1751d5ae102SDimitry Andric 
1761d5ae102SDimitry Andric   // Remove dead instructions due to loop induction variables.
1771d5ae102SDimitry Andric   removeDeadInstructions(KernelBB, EpilogBBs);
1781d5ae102SDimitry Andric 
1791d5ae102SDimitry Andric   // Add branches between prolog and epilog blocks.
1801d5ae102SDimitry Andric   addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
1811d5ae102SDimitry Andric 
1821d5ae102SDimitry Andric   delete[] VRMap;
183e3b55780SDimitry Andric   delete[] VRMapPhi;
1841d5ae102SDimitry Andric }
1851d5ae102SDimitry Andric 
cleanup()1861d5ae102SDimitry Andric void ModuloScheduleExpander::cleanup() {
1871d5ae102SDimitry Andric   // Remove the original loop since it's no longer referenced.
1881d5ae102SDimitry Andric   for (auto &I : *BB)
1891d5ae102SDimitry Andric     LIS.RemoveMachineInstrFromMaps(I);
1901d5ae102SDimitry Andric   BB->clear();
1911d5ae102SDimitry Andric   BB->eraseFromParent();
1921d5ae102SDimitry Andric }
1931d5ae102SDimitry Andric 
1941d5ae102SDimitry Andric /// Generate the pipeline prolog code.
generateProlog(unsigned LastStage,MachineBasicBlock * KernelBB,ValueMapTy * VRMap,MBBVectorTy & PrologBBs)1951d5ae102SDimitry Andric void ModuloScheduleExpander::generateProlog(unsigned LastStage,
1961d5ae102SDimitry Andric                                             MachineBasicBlock *KernelBB,
1971d5ae102SDimitry Andric                                             ValueMapTy *VRMap,
1981d5ae102SDimitry Andric                                             MBBVectorTy &PrologBBs) {
1991d5ae102SDimitry Andric   MachineBasicBlock *PredBB = Preheader;
2001d5ae102SDimitry Andric   InstrMapTy InstrMap;
2011d5ae102SDimitry Andric 
2021d5ae102SDimitry Andric   // Generate a basic block for each stage, not including the last stage,
2031d5ae102SDimitry Andric   // which will be generated in the kernel. Each basic block may contain
2041d5ae102SDimitry Andric   // instructions from multiple stages/iterations.
2051d5ae102SDimitry Andric   for (unsigned i = 0; i < LastStage; ++i) {
2061d5ae102SDimitry Andric     // Create and insert the prolog basic block prior to the original loop
2071d5ae102SDimitry Andric     // basic block.  The original loop is removed later.
2081d5ae102SDimitry Andric     MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2091d5ae102SDimitry Andric     PrologBBs.push_back(NewBB);
2101d5ae102SDimitry Andric     MF.insert(BB->getIterator(), NewBB);
2111d5ae102SDimitry Andric     NewBB->transferSuccessors(PredBB);
2121d5ae102SDimitry Andric     PredBB->addSuccessor(NewBB);
2131d5ae102SDimitry Andric     PredBB = NewBB;
2141de139fdSDimitry Andric     LIS.insertMBBInMaps(NewBB);
2151d5ae102SDimitry Andric 
2161d5ae102SDimitry Andric     // Generate instructions for each appropriate stage. Process instructions
2171d5ae102SDimitry Andric     // in original program order.
2181d5ae102SDimitry Andric     for (int StageNum = i; StageNum >= 0; --StageNum) {
2191d5ae102SDimitry Andric       for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2201d5ae102SDimitry Andric                                        BBE = BB->getFirstTerminator();
2211d5ae102SDimitry Andric            BBI != BBE; ++BBI) {
2221d5ae102SDimitry Andric         if (Schedule.getStage(&*BBI) == StageNum) {
2231d5ae102SDimitry Andric           if (BBI->isPHI())
2241d5ae102SDimitry Andric             continue;
2251d5ae102SDimitry Andric           MachineInstr *NewMI =
2261d5ae102SDimitry Andric               cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
2271d5ae102SDimitry Andric           updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
2281d5ae102SDimitry Andric           NewBB->push_back(NewMI);
2291d5ae102SDimitry Andric           InstrMap[NewMI] = &*BBI;
2301d5ae102SDimitry Andric         }
2311d5ae102SDimitry Andric       }
2321d5ae102SDimitry Andric     }
2331d5ae102SDimitry Andric     rewritePhiValues(NewBB, i, VRMap, InstrMap);
2341d5ae102SDimitry Andric     LLVM_DEBUG({
2351d5ae102SDimitry Andric       dbgs() << "prolog:\n";
2361d5ae102SDimitry Andric       NewBB->dump();
2371d5ae102SDimitry Andric     });
2381d5ae102SDimitry Andric   }
2391d5ae102SDimitry Andric 
2401d5ae102SDimitry Andric   PredBB->replaceSuccessor(BB, KernelBB);
2411d5ae102SDimitry Andric 
2421d5ae102SDimitry Andric   // Check if we need to remove the branch from the preheader to the original
2431d5ae102SDimitry Andric   // loop, and replace it with a branch to the new loop.
2441d5ae102SDimitry Andric   unsigned numBranches = TII->removeBranch(*Preheader);
2451d5ae102SDimitry Andric   if (numBranches) {
2461d5ae102SDimitry Andric     SmallVector<MachineOperand, 0> Cond;
2471d5ae102SDimitry Andric     TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
2481d5ae102SDimitry Andric   }
2491d5ae102SDimitry Andric }
2501d5ae102SDimitry Andric 
2511d5ae102SDimitry Andric /// Generate the pipeline epilog code. The epilog code finishes the iterations
2521d5ae102SDimitry Andric /// that were started in either the prolog or the kernel.  We create a basic
2531d5ae102SDimitry Andric /// block for each stage that needs to complete.
generateEpilog(unsigned LastStage,MachineBasicBlock * KernelBB,MachineBasicBlock * OrigBB,ValueMapTy * VRMap,ValueMapTy * VRMapPhi,MBBVectorTy & EpilogBBs,MBBVectorTy & PrologBBs)254145449b1SDimitry Andric void ModuloScheduleExpander::generateEpilog(
255145449b1SDimitry Andric     unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
256e3b55780SDimitry Andric     ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
257e3b55780SDimitry Andric     MBBVectorTy &PrologBBs) {
2581d5ae102SDimitry Andric   // We need to change the branch from the kernel to the first epilog block, so
2591d5ae102SDimitry Andric   // this call to analyze branch uses the kernel rather than the original BB.
2601d5ae102SDimitry Andric   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2611d5ae102SDimitry Andric   SmallVector<MachineOperand, 4> Cond;
2621d5ae102SDimitry Andric   bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2631d5ae102SDimitry Andric   assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2641d5ae102SDimitry Andric   if (checkBranch)
2651d5ae102SDimitry Andric     return;
2661d5ae102SDimitry Andric 
2671d5ae102SDimitry Andric   MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2681d5ae102SDimitry Andric   if (*LoopExitI == KernelBB)
2691d5ae102SDimitry Andric     ++LoopExitI;
2701d5ae102SDimitry Andric   assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2711d5ae102SDimitry Andric   MachineBasicBlock *LoopExitBB = *LoopExitI;
2721d5ae102SDimitry Andric 
2731d5ae102SDimitry Andric   MachineBasicBlock *PredBB = KernelBB;
2741d5ae102SDimitry Andric   MachineBasicBlock *EpilogStart = LoopExitBB;
2751d5ae102SDimitry Andric   InstrMapTy InstrMap;
2761d5ae102SDimitry Andric 
2771d5ae102SDimitry Andric   // Generate a basic block for each stage, not including the last stage,
2781d5ae102SDimitry Andric   // which was generated for the kernel.  Each basic block may contain
2791d5ae102SDimitry Andric   // instructions from multiple stages/iterations.
2801d5ae102SDimitry Andric   int EpilogStage = LastStage + 1;
2811d5ae102SDimitry Andric   for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2821d5ae102SDimitry Andric     MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
2831d5ae102SDimitry Andric     EpilogBBs.push_back(NewBB);
2841d5ae102SDimitry Andric     MF.insert(BB->getIterator(), NewBB);
2851d5ae102SDimitry Andric 
2861d5ae102SDimitry Andric     PredBB->replaceSuccessor(LoopExitBB, NewBB);
2871d5ae102SDimitry Andric     NewBB->addSuccessor(LoopExitBB);
2881de139fdSDimitry Andric     LIS.insertMBBInMaps(NewBB);
2891d5ae102SDimitry Andric 
2901d5ae102SDimitry Andric     if (EpilogStart == LoopExitBB)
2911d5ae102SDimitry Andric       EpilogStart = NewBB;
2921d5ae102SDimitry Andric 
2931d5ae102SDimitry Andric     // Add instructions to the epilog depending on the current block.
2941d5ae102SDimitry Andric     // Process instructions in original program order.
2951d5ae102SDimitry Andric     for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2961d5ae102SDimitry Andric       for (auto &BBI : *BB) {
2971d5ae102SDimitry Andric         if (BBI.isPHI())
2981d5ae102SDimitry Andric           continue;
2991d5ae102SDimitry Andric         MachineInstr *In = &BBI;
3001d5ae102SDimitry Andric         if ((unsigned)Schedule.getStage(In) == StageNum) {
3011d5ae102SDimitry Andric           // Instructions with memoperands in the epilog are updated with
3021d5ae102SDimitry Andric           // conservative values.
3031d5ae102SDimitry Andric           MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
3041d5ae102SDimitry Andric           updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
3051d5ae102SDimitry Andric           NewBB->push_back(NewMI);
3061d5ae102SDimitry Andric           InstrMap[NewMI] = In;
3071d5ae102SDimitry Andric         }
3081d5ae102SDimitry Andric       }
3091d5ae102SDimitry Andric     }
3101d5ae102SDimitry Andric     generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
3111d5ae102SDimitry Andric                          InstrMap, LastStage, EpilogStage, i == 1);
312e3b55780SDimitry Andric     generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
313e3b55780SDimitry Andric                  InstrMap, LastStage, EpilogStage, i == 1);
3141d5ae102SDimitry Andric     PredBB = NewBB;
3151d5ae102SDimitry Andric 
3161d5ae102SDimitry Andric     LLVM_DEBUG({
3171d5ae102SDimitry Andric       dbgs() << "epilog:\n";
3181d5ae102SDimitry Andric       NewBB->dump();
3191d5ae102SDimitry Andric     });
3201d5ae102SDimitry Andric   }
3211d5ae102SDimitry Andric 
3221d5ae102SDimitry Andric   // Fix any Phi nodes in the loop exit block.
3231d5ae102SDimitry Andric   LoopExitBB->replacePhiUsesWith(BB, PredBB);
3241d5ae102SDimitry Andric 
3251d5ae102SDimitry Andric   // Create a branch to the new epilog from the kernel.
3261d5ae102SDimitry Andric   // Remove the original branch and add a new branch to the epilog.
3271d5ae102SDimitry Andric   TII->removeBranch(*KernelBB);
328145449b1SDimitry Andric   assert((OrigBB == TBB || OrigBB == FBB) &&
329145449b1SDimitry Andric          "Unable to determine looping branch direction");
330145449b1SDimitry Andric   if (OrigBB != TBB)
331145449b1SDimitry Andric     TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
332145449b1SDimitry Andric   else
3331d5ae102SDimitry Andric     TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
3341d5ae102SDimitry Andric   // Add a branch to the loop exit.
3351d5ae102SDimitry Andric   if (EpilogBBs.size() > 0) {
3361d5ae102SDimitry Andric     MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
3371d5ae102SDimitry Andric     SmallVector<MachineOperand, 4> Cond1;
3381d5ae102SDimitry Andric     TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
3391d5ae102SDimitry Andric   }
3401d5ae102SDimitry Andric }
3411d5ae102SDimitry Andric 
3421d5ae102SDimitry Andric /// Replace all uses of FromReg that appear outside the specified
3431d5ae102SDimitry Andric /// basic block with ToReg.
replaceRegUsesAfterLoop(unsigned FromReg,unsigned ToReg,MachineBasicBlock * MBB,MachineRegisterInfo & MRI,LiveIntervals & LIS)3441d5ae102SDimitry Andric static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
3451d5ae102SDimitry Andric                                     MachineBasicBlock *MBB,
3461d5ae102SDimitry Andric                                     MachineRegisterInfo &MRI,
3471d5ae102SDimitry Andric                                     LiveIntervals &LIS) {
348c0981da4SDimitry Andric   for (MachineOperand &O :
349c0981da4SDimitry Andric        llvm::make_early_inc_range(MRI.use_operands(FromReg)))
3501d5ae102SDimitry Andric     if (O.getParent()->getParent() != MBB)
3511d5ae102SDimitry Andric       O.setReg(ToReg);
3521d5ae102SDimitry Andric   if (!LIS.hasInterval(ToReg))
3531d5ae102SDimitry Andric     LIS.createEmptyInterval(ToReg);
3541d5ae102SDimitry Andric }
3551d5ae102SDimitry Andric 
3561d5ae102SDimitry Andric /// Return true if the register has a use that occurs outside the
3571d5ae102SDimitry Andric /// specified loop.
hasUseAfterLoop(unsigned Reg,MachineBasicBlock * BB,MachineRegisterInfo & MRI)3581d5ae102SDimitry Andric static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
3591d5ae102SDimitry Andric                             MachineRegisterInfo &MRI) {
360c0981da4SDimitry Andric   for (const MachineOperand &MO : MRI.use_operands(Reg))
361c0981da4SDimitry Andric     if (MO.getParent()->getParent() != BB)
3621d5ae102SDimitry Andric       return true;
3631d5ae102SDimitry Andric   return false;
3641d5ae102SDimitry Andric }
3651d5ae102SDimitry Andric 
3661d5ae102SDimitry Andric /// Generate Phis for the specific block in the generated pipelined code.
3671d5ae102SDimitry Andric /// This function looks at the Phis from the original code to guide the
3681d5ae102SDimitry Andric /// creation of new Phis.
generateExistingPhis(MachineBasicBlock * NewBB,MachineBasicBlock * BB1,MachineBasicBlock * BB2,MachineBasicBlock * KernelBB,ValueMapTy * VRMap,InstrMapTy & InstrMap,unsigned LastStageNum,unsigned CurStageNum,bool IsLast)3691d5ae102SDimitry Andric void ModuloScheduleExpander::generateExistingPhis(
3701d5ae102SDimitry Andric     MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
3711d5ae102SDimitry Andric     MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
3721d5ae102SDimitry Andric     unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
3731d5ae102SDimitry Andric   // Compute the stage number for the initial value of the Phi, which
3741d5ae102SDimitry Andric   // comes from the prolog. The prolog to use depends on to which kernel/
3751d5ae102SDimitry Andric   // epilog that we're adding the Phi.
3761d5ae102SDimitry Andric   unsigned PrologStage = 0;
3771d5ae102SDimitry Andric   unsigned PrevStage = 0;
3781d5ae102SDimitry Andric   bool InKernel = (LastStageNum == CurStageNum);
3791d5ae102SDimitry Andric   if (InKernel) {
3801d5ae102SDimitry Andric     PrologStage = LastStageNum - 1;
3811d5ae102SDimitry Andric     PrevStage = CurStageNum;
3821d5ae102SDimitry Andric   } else {
3831d5ae102SDimitry Andric     PrologStage = LastStageNum - (CurStageNum - LastStageNum);
3841d5ae102SDimitry Andric     PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
3851d5ae102SDimitry Andric   }
3861d5ae102SDimitry Andric 
3871d5ae102SDimitry Andric   for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
3881d5ae102SDimitry Andric                                    BBE = BB->getFirstNonPHI();
3891d5ae102SDimitry Andric        BBI != BBE; ++BBI) {
3901d5ae102SDimitry Andric     Register Def = BBI->getOperand(0).getReg();
3911d5ae102SDimitry Andric 
3921d5ae102SDimitry Andric     unsigned InitVal = 0;
3931d5ae102SDimitry Andric     unsigned LoopVal = 0;
3941d5ae102SDimitry Andric     getPhiRegs(*BBI, BB, InitVal, LoopVal);
3951d5ae102SDimitry Andric 
3961d5ae102SDimitry Andric     unsigned PhiOp1 = 0;
3971d5ae102SDimitry Andric     // The Phi value from the loop body typically is defined in the loop, but
3981d5ae102SDimitry Andric     // not always. So, we need to check if the value is defined in the loop.
3991d5ae102SDimitry Andric     unsigned PhiOp2 = LoopVal;
4001d5ae102SDimitry Andric     if (VRMap[LastStageNum].count(LoopVal))
4011d5ae102SDimitry Andric       PhiOp2 = VRMap[LastStageNum][LoopVal];
4021d5ae102SDimitry Andric 
4031d5ae102SDimitry Andric     int StageScheduled = Schedule.getStage(&*BBI);
4041d5ae102SDimitry Andric     int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
4051d5ae102SDimitry Andric     unsigned NumStages = getStagesForReg(Def, CurStageNum);
4061d5ae102SDimitry Andric     if (NumStages == 0) {
4071d5ae102SDimitry Andric       // We don't need to generate a Phi anymore, but we need to rename any uses
4081d5ae102SDimitry Andric       // of the Phi value.
4091d5ae102SDimitry Andric       unsigned NewReg = VRMap[PrevStage][LoopVal];
4101d5ae102SDimitry Andric       rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
4111d5ae102SDimitry Andric                             InitVal, NewReg);
4121d5ae102SDimitry Andric       if (VRMap[CurStageNum].count(LoopVal))
4131d5ae102SDimitry Andric         VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
4141d5ae102SDimitry Andric     }
4151d5ae102SDimitry Andric     // Adjust the number of Phis needed depending on the number of prologs left,
4161d5ae102SDimitry Andric     // and the distance from where the Phi is first scheduled. The number of
4171d5ae102SDimitry Andric     // Phis cannot exceed the number of prolog stages. Each stage can
4181d5ae102SDimitry Andric     // potentially define two values.
4191d5ae102SDimitry Andric     unsigned MaxPhis = PrologStage + 2;
4201d5ae102SDimitry Andric     if (!InKernel && (int)PrologStage <= LoopValStage)
4211d5ae102SDimitry Andric       MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
4221d5ae102SDimitry Andric     unsigned NumPhis = std::min(NumStages, MaxPhis);
4231d5ae102SDimitry Andric 
4241d5ae102SDimitry Andric     unsigned NewReg = 0;
4251d5ae102SDimitry Andric     unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
4261d5ae102SDimitry Andric     // In the epilog, we may need to look back one stage to get the correct
427cfca06d7SDimitry Andric     // Phi name, because the epilog and prolog blocks execute the same stage.
4281d5ae102SDimitry Andric     // The correct name is from the previous block only when the Phi has
4291d5ae102SDimitry Andric     // been completely scheduled prior to the epilog, and Phi value is not
4301d5ae102SDimitry Andric     // needed in multiple stages.
4311d5ae102SDimitry Andric     int StageDiff = 0;
4321d5ae102SDimitry Andric     if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
4331d5ae102SDimitry Andric         NumPhis == 1)
4341d5ae102SDimitry Andric       StageDiff = 1;
4351d5ae102SDimitry Andric     // Adjust the computations below when the phi and the loop definition
4361d5ae102SDimitry Andric     // are scheduled in different stages.
4371d5ae102SDimitry Andric     if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
4381d5ae102SDimitry Andric       StageDiff = StageScheduled - LoopValStage;
4391d5ae102SDimitry Andric     for (unsigned np = 0; np < NumPhis; ++np) {
4401d5ae102SDimitry Andric       // If the Phi hasn't been scheduled, then use the initial Phi operand
4411d5ae102SDimitry Andric       // value. Otherwise, use the scheduled version of the instruction. This
4421d5ae102SDimitry Andric       // is a little complicated when a Phi references another Phi.
4431d5ae102SDimitry Andric       if (np > PrologStage || StageScheduled >= (int)LastStageNum)
4441d5ae102SDimitry Andric         PhiOp1 = InitVal;
4451d5ae102SDimitry Andric       // Check if the Phi has already been scheduled in a prolog stage.
4461d5ae102SDimitry Andric       else if (PrologStage >= AccessStage + StageDiff + np &&
4471d5ae102SDimitry Andric                VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
4481d5ae102SDimitry Andric         PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
4491d5ae102SDimitry Andric       // Check if the Phi has already been scheduled, but the loop instruction
4501d5ae102SDimitry Andric       // is either another Phi, or doesn't occur in the loop.
4511d5ae102SDimitry Andric       else if (PrologStage >= AccessStage + StageDiff + np) {
4521d5ae102SDimitry Andric         // If the Phi references another Phi, we need to examine the other
4531d5ae102SDimitry Andric         // Phi to get the correct value.
4541d5ae102SDimitry Andric         PhiOp1 = LoopVal;
4551d5ae102SDimitry Andric         MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
4561d5ae102SDimitry Andric         int Indirects = 1;
4571d5ae102SDimitry Andric         while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
4581d5ae102SDimitry Andric           int PhiStage = Schedule.getStage(InstOp1);
4591d5ae102SDimitry Andric           if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
4601d5ae102SDimitry Andric             PhiOp1 = getInitPhiReg(*InstOp1, BB);
4611d5ae102SDimitry Andric           else
4621d5ae102SDimitry Andric             PhiOp1 = getLoopPhiReg(*InstOp1, BB);
4631d5ae102SDimitry Andric           InstOp1 = MRI.getVRegDef(PhiOp1);
4641d5ae102SDimitry Andric           int PhiOpStage = Schedule.getStage(InstOp1);
4651d5ae102SDimitry Andric           int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
4661d5ae102SDimitry Andric           if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
4671d5ae102SDimitry Andric               VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
4681d5ae102SDimitry Andric             PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
4691d5ae102SDimitry Andric             break;
4701d5ae102SDimitry Andric           }
4711d5ae102SDimitry Andric           ++Indirects;
4721d5ae102SDimitry Andric         }
4731d5ae102SDimitry Andric       } else
4741d5ae102SDimitry Andric         PhiOp1 = InitVal;
4751d5ae102SDimitry Andric       // If this references a generated Phi in the kernel, get the Phi operand
4761d5ae102SDimitry Andric       // from the incoming block.
4771d5ae102SDimitry Andric       if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
4781d5ae102SDimitry Andric         if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
4791d5ae102SDimitry Andric           PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
4801d5ae102SDimitry Andric 
4811d5ae102SDimitry Andric       MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
4821d5ae102SDimitry Andric       bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
4831d5ae102SDimitry Andric       // In the epilog, a map lookup is needed to get the value from the kernel,
4841d5ae102SDimitry Andric       // or previous epilog block. How is does this depends on if the
4851d5ae102SDimitry Andric       // instruction is scheduled in the previous block.
4861d5ae102SDimitry Andric       if (!InKernel) {
4871d5ae102SDimitry Andric         int StageDiffAdj = 0;
4881d5ae102SDimitry Andric         if (LoopValStage != -1 && StageScheduled > LoopValStage)
4891d5ae102SDimitry Andric           StageDiffAdj = StageScheduled - LoopValStage;
4901d5ae102SDimitry Andric         // Use the loop value defined in the kernel, unless the kernel
4911d5ae102SDimitry Andric         // contains the last definition of the Phi.
4921d5ae102SDimitry Andric         if (np == 0 && PrevStage == LastStageNum &&
4931d5ae102SDimitry Andric             (StageScheduled != 0 || LoopValStage != 0) &&
4941d5ae102SDimitry Andric             VRMap[PrevStage - StageDiffAdj].count(LoopVal))
4951d5ae102SDimitry Andric           PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
4961d5ae102SDimitry Andric         // Use the value defined by the Phi. We add one because we switch
4971d5ae102SDimitry Andric         // from looking at the loop value to the Phi definition.
4981d5ae102SDimitry Andric         else if (np > 0 && PrevStage == LastStageNum &&
4991d5ae102SDimitry Andric                  VRMap[PrevStage - np + 1].count(Def))
5001d5ae102SDimitry Andric           PhiOp2 = VRMap[PrevStage - np + 1][Def];
5011d5ae102SDimitry Andric         // Use the loop value defined in the kernel.
5021d5ae102SDimitry Andric         else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
5031d5ae102SDimitry Andric                  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
5041d5ae102SDimitry Andric           PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
5051d5ae102SDimitry Andric         // Use the value defined by the Phi, unless we're generating the first
5061d5ae102SDimitry Andric         // epilog and the Phi refers to a Phi in a different stage.
5071d5ae102SDimitry Andric         else if (VRMap[PrevStage - np].count(Def) &&
5081d5ae102SDimitry Andric                  (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
5091d5ae102SDimitry Andric                   (LoopValStage == StageScheduled)))
5101d5ae102SDimitry Andric           PhiOp2 = VRMap[PrevStage - np][Def];
5111d5ae102SDimitry Andric       }
5121d5ae102SDimitry Andric 
5131d5ae102SDimitry Andric       // Check if we can reuse an existing Phi. This occurs when a Phi
5141d5ae102SDimitry Andric       // references another Phi, and the other Phi is scheduled in an
5151d5ae102SDimitry Andric       // earlier stage. We can try to reuse an existing Phi up until the last
5161d5ae102SDimitry Andric       // stage of the current Phi.
5171d5ae102SDimitry Andric       if (LoopDefIsPhi) {
5181d5ae102SDimitry Andric         if (static_cast<int>(PrologStage - np) >= StageScheduled) {
5191d5ae102SDimitry Andric           int LVNumStages = getStagesForPhi(LoopVal);
5201d5ae102SDimitry Andric           int StageDiff = (StageScheduled - LoopValStage);
5211d5ae102SDimitry Andric           LVNumStages -= StageDiff;
5221d5ae102SDimitry Andric           // Make sure the loop value Phi has been processed already.
5231d5ae102SDimitry Andric           if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
5241d5ae102SDimitry Andric             NewReg = PhiOp2;
5251d5ae102SDimitry Andric             unsigned ReuseStage = CurStageNum;
5261d5ae102SDimitry Andric             if (isLoopCarried(*PhiInst))
5271d5ae102SDimitry Andric               ReuseStage -= LVNumStages;
5281d5ae102SDimitry Andric             // Check if the Phi to reuse has been generated yet. If not, then
5291d5ae102SDimitry Andric             // there is nothing to reuse.
5301d5ae102SDimitry Andric             if (VRMap[ReuseStage - np].count(LoopVal)) {
5311d5ae102SDimitry Andric               NewReg = VRMap[ReuseStage - np][LoopVal];
5321d5ae102SDimitry Andric 
5331d5ae102SDimitry Andric               rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
5341d5ae102SDimitry Andric                                     Def, NewReg);
5351d5ae102SDimitry Andric               // Update the map with the new Phi name.
5361d5ae102SDimitry Andric               VRMap[CurStageNum - np][Def] = NewReg;
5371d5ae102SDimitry Andric               PhiOp2 = NewReg;
5381d5ae102SDimitry Andric               if (VRMap[LastStageNum - np - 1].count(LoopVal))
5391d5ae102SDimitry Andric                 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
5401d5ae102SDimitry Andric 
5411d5ae102SDimitry Andric               if (IsLast && np == NumPhis - 1)
5421d5ae102SDimitry Andric                 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
5431d5ae102SDimitry Andric               continue;
5441d5ae102SDimitry Andric             }
5451d5ae102SDimitry Andric           }
5461d5ae102SDimitry Andric         }
5471d5ae102SDimitry Andric         if (InKernel && StageDiff > 0 &&
5481d5ae102SDimitry Andric             VRMap[CurStageNum - StageDiff - np].count(LoopVal))
5491d5ae102SDimitry Andric           PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
5501d5ae102SDimitry Andric       }
5511d5ae102SDimitry Andric 
5521d5ae102SDimitry Andric       const TargetRegisterClass *RC = MRI.getRegClass(Def);
5531d5ae102SDimitry Andric       NewReg = MRI.createVirtualRegister(RC);
5541d5ae102SDimitry Andric 
5551d5ae102SDimitry Andric       MachineInstrBuilder NewPhi =
5561d5ae102SDimitry Andric           BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
5571d5ae102SDimitry Andric                   TII->get(TargetOpcode::PHI), NewReg);
5581d5ae102SDimitry Andric       NewPhi.addReg(PhiOp1).addMBB(BB1);
5591d5ae102SDimitry Andric       NewPhi.addReg(PhiOp2).addMBB(BB2);
5601d5ae102SDimitry Andric       if (np == 0)
5611d5ae102SDimitry Andric         InstrMap[NewPhi] = &*BBI;
5621d5ae102SDimitry Andric 
5631d5ae102SDimitry Andric       // We define the Phis after creating the new pipelined code, so
5641d5ae102SDimitry Andric       // we need to rename the Phi values in scheduled instructions.
5651d5ae102SDimitry Andric 
5661d5ae102SDimitry Andric       unsigned PrevReg = 0;
5671d5ae102SDimitry Andric       if (InKernel && VRMap[PrevStage - np].count(LoopVal))
5681d5ae102SDimitry Andric         PrevReg = VRMap[PrevStage - np][LoopVal];
5691d5ae102SDimitry Andric       rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
5701d5ae102SDimitry Andric                             NewReg, PrevReg);
5711d5ae102SDimitry Andric       // If the Phi has been scheduled, use the new name for rewriting.
5721d5ae102SDimitry Andric       if (VRMap[CurStageNum - np].count(Def)) {
5731d5ae102SDimitry Andric         unsigned R = VRMap[CurStageNum - np][Def];
5741d5ae102SDimitry Andric         rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
5751d5ae102SDimitry Andric                               NewReg);
5761d5ae102SDimitry Andric       }
5771d5ae102SDimitry Andric 
5781d5ae102SDimitry Andric       // Check if we need to rename any uses that occurs after the loop. The
5791d5ae102SDimitry Andric       // register to replace depends on whether the Phi is scheduled in the
5801d5ae102SDimitry Andric       // epilog.
5811d5ae102SDimitry Andric       if (IsLast && np == NumPhis - 1)
5821d5ae102SDimitry Andric         replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
5831d5ae102SDimitry Andric 
5841d5ae102SDimitry Andric       // In the kernel, a dependent Phi uses the value from this Phi.
5851d5ae102SDimitry Andric       if (InKernel)
5861d5ae102SDimitry Andric         PhiOp2 = NewReg;
5871d5ae102SDimitry Andric 
5881d5ae102SDimitry Andric       // Update the map with the new Phi name.
5891d5ae102SDimitry Andric       VRMap[CurStageNum - np][Def] = NewReg;
5901d5ae102SDimitry Andric     }
5911d5ae102SDimitry Andric 
5921d5ae102SDimitry Andric     while (NumPhis++ < NumStages) {
5931d5ae102SDimitry Andric       rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
5941d5ae102SDimitry Andric                             NewReg, 0);
5951d5ae102SDimitry Andric     }
5961d5ae102SDimitry Andric 
5971d5ae102SDimitry Andric     // Check if we need to rename a Phi that has been eliminated due to
5981d5ae102SDimitry Andric     // scheduling.
5991d5ae102SDimitry Andric     if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
6001d5ae102SDimitry Andric       replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
6011d5ae102SDimitry Andric   }
6021d5ae102SDimitry Andric }
6031d5ae102SDimitry Andric 
6041d5ae102SDimitry Andric /// Generate Phis for the specified block in the generated pipelined code.
6051d5ae102SDimitry Andric /// These are new Phis needed because the definition is scheduled after the
6061d5ae102SDimitry Andric /// use in the pipelined sequence.
generatePhis(MachineBasicBlock * NewBB,MachineBasicBlock * BB1,MachineBasicBlock * BB2,MachineBasicBlock * KernelBB,ValueMapTy * VRMap,ValueMapTy * VRMapPhi,InstrMapTy & InstrMap,unsigned LastStageNum,unsigned CurStageNum,bool IsLast)6071d5ae102SDimitry Andric void ModuloScheduleExpander::generatePhis(
6081d5ae102SDimitry Andric     MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
609e3b55780SDimitry Andric     MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
610e3b55780SDimitry Andric     InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
611e3b55780SDimitry Andric     bool IsLast) {
6121d5ae102SDimitry Andric   // Compute the stage number that contains the initial Phi value, and
6131d5ae102SDimitry Andric   // the Phi from the previous stage.
6141d5ae102SDimitry Andric   unsigned PrologStage = 0;
6151d5ae102SDimitry Andric   unsigned PrevStage = 0;
6161d5ae102SDimitry Andric   unsigned StageDiff = CurStageNum - LastStageNum;
6171d5ae102SDimitry Andric   bool InKernel = (StageDiff == 0);
6181d5ae102SDimitry Andric   if (InKernel) {
6191d5ae102SDimitry Andric     PrologStage = LastStageNum - 1;
6201d5ae102SDimitry Andric     PrevStage = CurStageNum;
6211d5ae102SDimitry Andric   } else {
6221d5ae102SDimitry Andric     PrologStage = LastStageNum - StageDiff;
6231d5ae102SDimitry Andric     PrevStage = LastStageNum + StageDiff - 1;
6241d5ae102SDimitry Andric   }
6251d5ae102SDimitry Andric 
6261d5ae102SDimitry Andric   for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
6271d5ae102SDimitry Andric                                    BBE = BB->instr_end();
6281d5ae102SDimitry Andric        BBI != BBE; ++BBI) {
6291d5ae102SDimitry Andric     for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
6301d5ae102SDimitry Andric       MachineOperand &MO = BBI->getOperand(i);
631e3b55780SDimitry Andric       if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
6321d5ae102SDimitry Andric         continue;
6331d5ae102SDimitry Andric 
6341d5ae102SDimitry Andric       int StageScheduled = Schedule.getStage(&*BBI);
6351d5ae102SDimitry Andric       assert(StageScheduled != -1 && "Expecting scheduled instruction.");
6361d5ae102SDimitry Andric       Register Def = MO.getReg();
6371d5ae102SDimitry Andric       unsigned NumPhis = getStagesForReg(Def, CurStageNum);
6381d5ae102SDimitry Andric       // An instruction scheduled in stage 0 and is used after the loop
6391d5ae102SDimitry Andric       // requires a phi in the epilog for the last definition from either
6401d5ae102SDimitry Andric       // the kernel or prolog.
6411d5ae102SDimitry Andric       if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
6421d5ae102SDimitry Andric           hasUseAfterLoop(Def, BB, MRI))
6431d5ae102SDimitry Andric         NumPhis = 1;
6441d5ae102SDimitry Andric       if (!InKernel && (unsigned)StageScheduled > PrologStage)
6451d5ae102SDimitry Andric         continue;
6461d5ae102SDimitry Andric 
647e3b55780SDimitry Andric       unsigned PhiOp2;
648e3b55780SDimitry Andric       if (InKernel) {
649e3b55780SDimitry Andric         PhiOp2 = VRMap[PrevStage][Def];
6501d5ae102SDimitry Andric         if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
6511d5ae102SDimitry Andric           if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
6521d5ae102SDimitry Andric             PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
653e3b55780SDimitry Andric       }
6541d5ae102SDimitry Andric       // The number of Phis can't exceed the number of prolog stages. The
6551d5ae102SDimitry Andric       // prolog stage number is zero based.
6561d5ae102SDimitry Andric       if (NumPhis > PrologStage + 1 - StageScheduled)
6571d5ae102SDimitry Andric         NumPhis = PrologStage + 1 - StageScheduled;
6581d5ae102SDimitry Andric       for (unsigned np = 0; np < NumPhis; ++np) {
659e3b55780SDimitry Andric         // Example for
660e3b55780SDimitry Andric         // Org:
661e3b55780SDimitry Andric         //   %Org = ... (Scheduled at Stage#0, NumPhi = 2)
662e3b55780SDimitry Andric         //
663e3b55780SDimitry Andric         // Prolog0 (Stage0):
664e3b55780SDimitry Andric         //   %Clone0 = ...
665e3b55780SDimitry Andric         // Prolog1 (Stage1):
666e3b55780SDimitry Andric         //   %Clone1 = ...
667e3b55780SDimitry Andric         // Kernel (Stage2):
668e3b55780SDimitry Andric         //   %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
669e3b55780SDimitry Andric         //   %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
670e3b55780SDimitry Andric         //   %Clone2 = ...
671e3b55780SDimitry Andric         // Epilog0 (Stage3):
672e3b55780SDimitry Andric         //   %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
673e3b55780SDimitry Andric         //   %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
674e3b55780SDimitry Andric         // Epilog1 (Stage4):
675e3b55780SDimitry Andric         //   %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
676e3b55780SDimitry Andric         //
677e3b55780SDimitry Andric         // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
678e3b55780SDimitry Andric         // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
679e3b55780SDimitry Andric         // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
680e3b55780SDimitry Andric 
6811d5ae102SDimitry Andric         unsigned PhiOp1 = VRMap[PrologStage][Def];
6821d5ae102SDimitry Andric         if (np <= PrologStage)
6831d5ae102SDimitry Andric           PhiOp1 = VRMap[PrologStage - np][Def];
684e3b55780SDimitry Andric         if (!InKernel) {
685e3b55780SDimitry Andric           if (PrevStage == LastStageNum && np == 0)
686e3b55780SDimitry Andric             PhiOp2 = VRMap[LastStageNum][Def];
687e3b55780SDimitry Andric           else
688e3b55780SDimitry Andric             PhiOp2 = VRMapPhi[PrevStage - np][Def];
6891d5ae102SDimitry Andric         }
6901d5ae102SDimitry Andric 
6911d5ae102SDimitry Andric         const TargetRegisterClass *RC = MRI.getRegClass(Def);
6921d5ae102SDimitry Andric         Register NewReg = MRI.createVirtualRegister(RC);
6931d5ae102SDimitry Andric 
6941d5ae102SDimitry Andric         MachineInstrBuilder NewPhi =
6951d5ae102SDimitry Andric             BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
6961d5ae102SDimitry Andric                     TII->get(TargetOpcode::PHI), NewReg);
6971d5ae102SDimitry Andric         NewPhi.addReg(PhiOp1).addMBB(BB1);
6981d5ae102SDimitry Andric         NewPhi.addReg(PhiOp2).addMBB(BB2);
6991d5ae102SDimitry Andric         if (np == 0)
7001d5ae102SDimitry Andric           InstrMap[NewPhi] = &*BBI;
7011d5ae102SDimitry Andric 
7021d5ae102SDimitry Andric         // Rewrite uses and update the map. The actions depend upon whether
7031d5ae102SDimitry Andric         // we generating code for the kernel or epilog blocks.
7041d5ae102SDimitry Andric         if (InKernel) {
7051d5ae102SDimitry Andric           rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
7061d5ae102SDimitry Andric                                 NewReg);
7071d5ae102SDimitry Andric           rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
7081d5ae102SDimitry Andric                                 NewReg);
7091d5ae102SDimitry Andric 
7101d5ae102SDimitry Andric           PhiOp2 = NewReg;
711e3b55780SDimitry Andric           VRMapPhi[PrevStage - np - 1][Def] = NewReg;
7121d5ae102SDimitry Andric         } else {
713e3b55780SDimitry Andric           VRMapPhi[CurStageNum - np][Def] = NewReg;
7141d5ae102SDimitry Andric           if (np == NumPhis - 1)
7151d5ae102SDimitry Andric             rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
7161d5ae102SDimitry Andric                                   NewReg);
7171d5ae102SDimitry Andric         }
7181d5ae102SDimitry Andric         if (IsLast && np == NumPhis - 1)
7191d5ae102SDimitry Andric           replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
7201d5ae102SDimitry Andric       }
7211d5ae102SDimitry Andric     }
7221d5ae102SDimitry Andric   }
7231d5ae102SDimitry Andric }
7241d5ae102SDimitry Andric 
7251d5ae102SDimitry Andric /// Remove instructions that generate values with no uses.
7261d5ae102SDimitry Andric /// Typically, these are induction variable operations that generate values
7271d5ae102SDimitry Andric /// used in the loop itself.  A dead instruction has a definition with
7281d5ae102SDimitry Andric /// no uses, or uses that occur in the original loop only.
removeDeadInstructions(MachineBasicBlock * KernelBB,MBBVectorTy & EpilogBBs)7291d5ae102SDimitry Andric void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
7301d5ae102SDimitry Andric                                                     MBBVectorTy &EpilogBBs) {
7311d5ae102SDimitry Andric   // For each epilog block, check that the value defined by each instruction
7321d5ae102SDimitry Andric   // is used.  If not, delete it.
733c0981da4SDimitry Andric   for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
734c0981da4SDimitry Andric     for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(),
735c0981da4SDimitry Andric                                                    ME = MBB->instr_rend();
7361d5ae102SDimitry Andric          MI != ME;) {
7371d5ae102SDimitry Andric       // From DeadMachineInstructionElem. Don't delete inline assembly.
7381d5ae102SDimitry Andric       if (MI->isInlineAsm()) {
7391d5ae102SDimitry Andric         ++MI;
7401d5ae102SDimitry Andric         continue;
7411d5ae102SDimitry Andric       }
7421d5ae102SDimitry Andric       bool SawStore = false;
7431d5ae102SDimitry Andric       // Check if it's safe to remove the instruction due to side effects.
7441d5ae102SDimitry Andric       // We can, and want to, remove Phis here.
7451d5ae102SDimitry Andric       if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
7461d5ae102SDimitry Andric         ++MI;
7471d5ae102SDimitry Andric         continue;
7481d5ae102SDimitry Andric       }
7491d5ae102SDimitry Andric       bool used = true;
7507fa27ce4SDimitry Andric       for (const MachineOperand &MO : MI->all_defs()) {
751c0981da4SDimitry Andric         Register reg = MO.getReg();
7521d5ae102SDimitry Andric         // Assume physical registers are used, unless they are marked dead.
753e3b55780SDimitry Andric         if (reg.isPhysical()) {
754c0981da4SDimitry Andric           used = !MO.isDead();
7551d5ae102SDimitry Andric           if (used)
7561d5ae102SDimitry Andric             break;
7571d5ae102SDimitry Andric           continue;
7581d5ae102SDimitry Andric         }
7591d5ae102SDimitry Andric         unsigned realUses = 0;
760c0981da4SDimitry Andric         for (const MachineOperand &U : MRI.use_operands(reg)) {
7611d5ae102SDimitry Andric           // Check if there are any uses that occur only in the original
7621d5ae102SDimitry Andric           // loop.  If so, that's not a real use.
763c0981da4SDimitry Andric           if (U.getParent()->getParent() != BB) {
7641d5ae102SDimitry Andric             realUses++;
7651d5ae102SDimitry Andric             used = true;
7661d5ae102SDimitry Andric             break;
7671d5ae102SDimitry Andric           }
7681d5ae102SDimitry Andric         }
7691d5ae102SDimitry Andric         if (realUses > 0)
7701d5ae102SDimitry Andric           break;
7711d5ae102SDimitry Andric         used = false;
7721d5ae102SDimitry Andric       }
7731d5ae102SDimitry Andric       if (!used) {
7741d5ae102SDimitry Andric         LIS.RemoveMachineInstrFromMaps(*MI);
7751d5ae102SDimitry Andric         MI++->eraseFromParent();
7761d5ae102SDimitry Andric         continue;
7771d5ae102SDimitry Andric       }
7781d5ae102SDimitry Andric       ++MI;
7791d5ae102SDimitry Andric     }
7801d5ae102SDimitry Andric   // In the kernel block, check if we can remove a Phi that generates a value
7811d5ae102SDimitry Andric   // used in an instruction removed in the epilog block.
782c0981da4SDimitry Andric   for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
783c0981da4SDimitry Andric     Register reg = MI.getOperand(0).getReg();
7841d5ae102SDimitry Andric     if (MRI.use_begin(reg) == MRI.use_end()) {
785c0981da4SDimitry Andric       LIS.RemoveMachineInstrFromMaps(MI);
786c0981da4SDimitry Andric       MI.eraseFromParent();
7871d5ae102SDimitry Andric     }
7881d5ae102SDimitry Andric   }
7891d5ae102SDimitry Andric }
7901d5ae102SDimitry Andric 
7911d5ae102SDimitry Andric /// For loop carried definitions, we split the lifetime of a virtual register
7921d5ae102SDimitry Andric /// that has uses past the definition in the next iteration. A copy with a new
7931d5ae102SDimitry Andric /// virtual register is inserted before the definition, which helps with
7941d5ae102SDimitry Andric /// generating a better register assignment.
7951d5ae102SDimitry Andric ///
7961d5ae102SDimitry Andric ///   v1 = phi(a, v2)     v1 = phi(a, v2)
7971d5ae102SDimitry Andric ///   v2 = phi(b, v3)     v2 = phi(b, v3)
7981d5ae102SDimitry Andric ///   v3 = ..             v4 = copy v1
7991d5ae102SDimitry Andric ///   .. = V1             v3 = ..
8001d5ae102SDimitry Andric ///                       .. = v4
splitLifetimes(MachineBasicBlock * KernelBB,MBBVectorTy & EpilogBBs)8011d5ae102SDimitry Andric void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
8021d5ae102SDimitry Andric                                             MBBVectorTy &EpilogBBs) {
8031d5ae102SDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8041d5ae102SDimitry Andric   for (auto &PHI : KernelBB->phis()) {
8051d5ae102SDimitry Andric     Register Def = PHI.getOperand(0).getReg();
8061d5ae102SDimitry Andric     // Check for any Phi definition that used as an operand of another Phi
8071d5ae102SDimitry Andric     // in the same block.
8081d5ae102SDimitry Andric     for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
8091d5ae102SDimitry Andric                                                  E = MRI.use_instr_end();
8101d5ae102SDimitry Andric          I != E; ++I) {
8111d5ae102SDimitry Andric       if (I->isPHI() && I->getParent() == KernelBB) {
8121d5ae102SDimitry Andric         // Get the loop carried definition.
8131d5ae102SDimitry Andric         unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
8141d5ae102SDimitry Andric         if (!LCDef)
8151d5ae102SDimitry Andric           continue;
8161d5ae102SDimitry Andric         MachineInstr *MI = MRI.getVRegDef(LCDef);
8171d5ae102SDimitry Andric         if (!MI || MI->getParent() != KernelBB || MI->isPHI())
8181d5ae102SDimitry Andric           continue;
8191d5ae102SDimitry Andric         // Search through the rest of the block looking for uses of the Phi
8201d5ae102SDimitry Andric         // definition. If one occurs, then split the lifetime.
8211d5ae102SDimitry Andric         unsigned SplitReg = 0;
8221d5ae102SDimitry Andric         for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
8231d5ae102SDimitry Andric                                     KernelBB->instr_end()))
824ac9a064cSDimitry Andric           if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
8251d5ae102SDimitry Andric             // We split the lifetime when we find the first use.
8261d5ae102SDimitry Andric             if (SplitReg == 0) {
8271d5ae102SDimitry Andric               SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
8281d5ae102SDimitry Andric               BuildMI(*KernelBB, MI, MI->getDebugLoc(),
8291d5ae102SDimitry Andric                       TII->get(TargetOpcode::COPY), SplitReg)
8301d5ae102SDimitry Andric                   .addReg(Def);
8311d5ae102SDimitry Andric             }
8321d5ae102SDimitry Andric             BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
8331d5ae102SDimitry Andric           }
8341d5ae102SDimitry Andric         if (!SplitReg)
8351d5ae102SDimitry Andric           continue;
8361d5ae102SDimitry Andric         // Search through each of the epilog blocks for any uses to be renamed.
8371d5ae102SDimitry Andric         for (auto &Epilog : EpilogBBs)
8381d5ae102SDimitry Andric           for (auto &I : *Epilog)
839ac9a064cSDimitry Andric             if (I.readsRegister(Def, /*TRI=*/nullptr))
8401d5ae102SDimitry Andric               I.substituteRegister(Def, SplitReg, 0, *TRI);
8411d5ae102SDimitry Andric         break;
8421d5ae102SDimitry Andric       }
8431d5ae102SDimitry Andric     }
8441d5ae102SDimitry Andric   }
8451d5ae102SDimitry Andric }
8461d5ae102SDimitry Andric 
8471d5ae102SDimitry Andric /// Remove the incoming block from the Phis in a basic block.
removePhis(MachineBasicBlock * BB,MachineBasicBlock * Incoming)8481d5ae102SDimitry Andric static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
8491d5ae102SDimitry Andric   for (MachineInstr &MI : *BB) {
8501d5ae102SDimitry Andric     if (!MI.isPHI())
8511d5ae102SDimitry Andric       break;
8521d5ae102SDimitry Andric     for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
8531d5ae102SDimitry Andric       if (MI.getOperand(i + 1).getMBB() == Incoming) {
854145449b1SDimitry Andric         MI.removeOperand(i + 1);
855145449b1SDimitry Andric         MI.removeOperand(i);
8561d5ae102SDimitry Andric         break;
8571d5ae102SDimitry Andric       }
8581d5ae102SDimitry Andric   }
8591d5ae102SDimitry Andric }
8601d5ae102SDimitry Andric 
8611d5ae102SDimitry Andric /// Create branches from each prolog basic block to the appropriate epilog
8621d5ae102SDimitry Andric /// block.  These edges are needed if the loop ends before reaching the
8631d5ae102SDimitry Andric /// kernel.
addBranches(MachineBasicBlock & PreheaderBB,MBBVectorTy & PrologBBs,MachineBasicBlock * KernelBB,MBBVectorTy & EpilogBBs,ValueMapTy * VRMap)8641d5ae102SDimitry Andric void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
8651d5ae102SDimitry Andric                                          MBBVectorTy &PrologBBs,
8661d5ae102SDimitry Andric                                          MachineBasicBlock *KernelBB,
8671d5ae102SDimitry Andric                                          MBBVectorTy &EpilogBBs,
8681d5ae102SDimitry Andric                                          ValueMapTy *VRMap) {
8691d5ae102SDimitry Andric   assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
8701d5ae102SDimitry Andric   MachineBasicBlock *LastPro = KernelBB;
8711d5ae102SDimitry Andric   MachineBasicBlock *LastEpi = KernelBB;
8721d5ae102SDimitry Andric 
8731d5ae102SDimitry Andric   // Start from the blocks connected to the kernel and work "out"
8741d5ae102SDimitry Andric   // to the first prolog and the last epilog blocks.
8751d5ae102SDimitry Andric   SmallVector<MachineInstr *, 4> PrevInsts;
8761d5ae102SDimitry Andric   unsigned MaxIter = PrologBBs.size() - 1;
8771d5ae102SDimitry Andric   for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
8781d5ae102SDimitry Andric     // Add branches to the prolog that go to the corresponding
8791d5ae102SDimitry Andric     // epilog, and the fall-thru prolog/kernel block.
8801d5ae102SDimitry Andric     MachineBasicBlock *Prolog = PrologBBs[j];
8811d5ae102SDimitry Andric     MachineBasicBlock *Epilog = EpilogBBs[i];
8821d5ae102SDimitry Andric 
8831d5ae102SDimitry Andric     SmallVector<MachineOperand, 4> Cond;
884e3b55780SDimitry Andric     std::optional<bool> StaticallyGreater =
8851d5ae102SDimitry Andric         LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
8861d5ae102SDimitry Andric     unsigned numAdded = 0;
887145449b1SDimitry Andric     if (!StaticallyGreater) {
8881d5ae102SDimitry Andric       Prolog->addSuccessor(Epilog);
8891d5ae102SDimitry Andric       numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
8901d5ae102SDimitry Andric     } else if (*StaticallyGreater == false) {
8911d5ae102SDimitry Andric       Prolog->addSuccessor(Epilog);
8921d5ae102SDimitry Andric       Prolog->removeSuccessor(LastPro);
8931d5ae102SDimitry Andric       LastEpi->removeSuccessor(Epilog);
8941d5ae102SDimitry Andric       numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
8951d5ae102SDimitry Andric       removePhis(Epilog, LastEpi);
8961d5ae102SDimitry Andric       // Remove the blocks that are no longer referenced.
8971d5ae102SDimitry Andric       if (LastPro != LastEpi) {
8981d5ae102SDimitry Andric         LastEpi->clear();
8991d5ae102SDimitry Andric         LastEpi->eraseFromParent();
9001d5ae102SDimitry Andric       }
9011d5ae102SDimitry Andric       if (LastPro == KernelBB) {
9021d5ae102SDimitry Andric         LoopInfo->disposed();
9031d5ae102SDimitry Andric         NewKernel = nullptr;
9041d5ae102SDimitry Andric       }
9051d5ae102SDimitry Andric       LastPro->clear();
9061d5ae102SDimitry Andric       LastPro->eraseFromParent();
9071d5ae102SDimitry Andric     } else {
9081d5ae102SDimitry Andric       numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
9091d5ae102SDimitry Andric       removePhis(Epilog, Prolog);
9101d5ae102SDimitry Andric     }
9111d5ae102SDimitry Andric     LastPro = Prolog;
9121d5ae102SDimitry Andric     LastEpi = Epilog;
9131d5ae102SDimitry Andric     for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
9141d5ae102SDimitry Andric                                                    E = Prolog->instr_rend();
9151d5ae102SDimitry Andric          I != E && numAdded > 0; ++I, --numAdded)
9161d5ae102SDimitry Andric       updateInstruction(&*I, false, j, 0, VRMap);
9171d5ae102SDimitry Andric   }
9181d5ae102SDimitry Andric 
9191d5ae102SDimitry Andric   if (NewKernel) {
9201d5ae102SDimitry Andric     LoopInfo->setPreheader(PrologBBs[MaxIter]);
9211d5ae102SDimitry Andric     LoopInfo->adjustTripCount(-(MaxIter + 1));
9221d5ae102SDimitry Andric   }
9231d5ae102SDimitry Andric }
9241d5ae102SDimitry Andric 
9251d5ae102SDimitry Andric /// Return true if we can compute the amount the instruction changes
9261d5ae102SDimitry Andric /// during each iteration. Set Delta to the amount of the change.
computeDelta(MachineInstr & MI,unsigned & Delta)9271d5ae102SDimitry Andric bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
9281d5ae102SDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9291d5ae102SDimitry Andric   const MachineOperand *BaseOp;
9301d5ae102SDimitry Andric   int64_t Offset;
931cfca06d7SDimitry Andric   bool OffsetIsScalable;
932cfca06d7SDimitry Andric   if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
933cfca06d7SDimitry Andric     return false;
934cfca06d7SDimitry Andric 
935cfca06d7SDimitry Andric   // FIXME: This algorithm assumes instructions have fixed-size offsets.
936cfca06d7SDimitry Andric   if (OffsetIsScalable)
9371d5ae102SDimitry Andric     return false;
9381d5ae102SDimitry Andric 
9391d5ae102SDimitry Andric   if (!BaseOp->isReg())
9401d5ae102SDimitry Andric     return false;
9411d5ae102SDimitry Andric 
9421d5ae102SDimitry Andric   Register BaseReg = BaseOp->getReg();
9431d5ae102SDimitry Andric 
9441d5ae102SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
9451d5ae102SDimitry Andric   // Check if there is a Phi. If so, get the definition in the loop.
9461d5ae102SDimitry Andric   MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
9471d5ae102SDimitry Andric   if (BaseDef && BaseDef->isPHI()) {
9481d5ae102SDimitry Andric     BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
9491d5ae102SDimitry Andric     BaseDef = MRI.getVRegDef(BaseReg);
9501d5ae102SDimitry Andric   }
9511d5ae102SDimitry Andric   if (!BaseDef)
9521d5ae102SDimitry Andric     return false;
9531d5ae102SDimitry Andric 
9541d5ae102SDimitry Andric   int D = 0;
9551d5ae102SDimitry Andric   if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
9561d5ae102SDimitry Andric     return false;
9571d5ae102SDimitry Andric 
9581d5ae102SDimitry Andric   Delta = D;
9591d5ae102SDimitry Andric   return true;
9601d5ae102SDimitry Andric }
9611d5ae102SDimitry Andric 
9621d5ae102SDimitry Andric /// Update the memory operand with a new offset when the pipeliner
9631d5ae102SDimitry Andric /// generates a new copy of the instruction that refers to a
9641d5ae102SDimitry Andric /// different memory location.
updateMemOperands(MachineInstr & NewMI,MachineInstr & OldMI,unsigned Num)9651d5ae102SDimitry Andric void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
9661d5ae102SDimitry Andric                                                MachineInstr &OldMI,
9671d5ae102SDimitry Andric                                                unsigned Num) {
9681d5ae102SDimitry Andric   if (Num == 0)
9691d5ae102SDimitry Andric     return;
9701d5ae102SDimitry Andric   // If the instruction has memory operands, then adjust the offset
9711d5ae102SDimitry Andric   // when the instruction appears in different stages.
9721d5ae102SDimitry Andric   if (NewMI.memoperands_empty())
9731d5ae102SDimitry Andric     return;
9741d5ae102SDimitry Andric   SmallVector<MachineMemOperand *, 2> NewMMOs;
9751d5ae102SDimitry Andric   for (MachineMemOperand *MMO : NewMI.memoperands()) {
9761d5ae102SDimitry Andric     // TODO: Figure out whether isAtomic is really necessary (see D57601).
9771d5ae102SDimitry Andric     if (MMO->isVolatile() || MMO->isAtomic() ||
9781d5ae102SDimitry Andric         (MMO->isInvariant() && MMO->isDereferenceable()) ||
9791d5ae102SDimitry Andric         (!MMO->getValue())) {
9801d5ae102SDimitry Andric       NewMMOs.push_back(MMO);
9811d5ae102SDimitry Andric       continue;
9821d5ae102SDimitry Andric     }
9831d5ae102SDimitry Andric     unsigned Delta;
9841d5ae102SDimitry Andric     if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
9851d5ae102SDimitry Andric       int64_t AdjOffset = Delta * Num;
9861d5ae102SDimitry Andric       NewMMOs.push_back(
9871d5ae102SDimitry Andric           MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
9881d5ae102SDimitry Andric     } else {
989ac9a064cSDimitry Andric       NewMMOs.push_back(MF.getMachineMemOperand(
990ac9a064cSDimitry Andric           MMO, 0, LocationSize::beforeOrAfterPointer()));
9911d5ae102SDimitry Andric     }
9921d5ae102SDimitry Andric   }
9931d5ae102SDimitry Andric   NewMI.setMemRefs(MF, NewMMOs);
9941d5ae102SDimitry Andric }
9951d5ae102SDimitry Andric 
9961d5ae102SDimitry Andric /// Clone the instruction for the new pipelined loop and update the
9971d5ae102SDimitry Andric /// memory operands, if needed.
cloneInstr(MachineInstr * OldMI,unsigned CurStageNum,unsigned InstStageNum)9981d5ae102SDimitry Andric MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
9991d5ae102SDimitry Andric                                                  unsigned CurStageNum,
10001d5ae102SDimitry Andric                                                  unsigned InstStageNum) {
10011d5ae102SDimitry Andric   MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
10021d5ae102SDimitry Andric   updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
10031d5ae102SDimitry Andric   return NewMI;
10041d5ae102SDimitry Andric }
10051d5ae102SDimitry Andric 
10061d5ae102SDimitry Andric /// Clone the instruction for the new pipelined loop. If needed, this
10071d5ae102SDimitry Andric /// function updates the instruction using the values saved in the
10081d5ae102SDimitry Andric /// InstrChanges structure.
cloneAndChangeInstr(MachineInstr * OldMI,unsigned CurStageNum,unsigned InstStageNum)10091d5ae102SDimitry Andric MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
10101d5ae102SDimitry Andric     MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
10111d5ae102SDimitry Andric   MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
10121d5ae102SDimitry Andric   auto It = InstrChanges.find(OldMI);
10131d5ae102SDimitry Andric   if (It != InstrChanges.end()) {
10141d5ae102SDimitry Andric     std::pair<unsigned, int64_t> RegAndOffset = It->second;
10151d5ae102SDimitry Andric     unsigned BasePos, OffsetPos;
10161d5ae102SDimitry Andric     if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
10171d5ae102SDimitry Andric       return nullptr;
10181d5ae102SDimitry Andric     int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
10191d5ae102SDimitry Andric     MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
10201d5ae102SDimitry Andric     if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
10211d5ae102SDimitry Andric       NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
10221d5ae102SDimitry Andric     NewMI->getOperand(OffsetPos).setImm(NewOffset);
10231d5ae102SDimitry Andric   }
10241d5ae102SDimitry Andric   updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
10251d5ae102SDimitry Andric   return NewMI;
10261d5ae102SDimitry Andric }
10271d5ae102SDimitry Andric 
10281d5ae102SDimitry Andric /// Update the machine instruction with new virtual registers.  This
1029145449b1SDimitry Andric /// function may change the definitions and/or uses.
updateInstruction(MachineInstr * NewMI,bool LastDef,unsigned CurStageNum,unsigned InstrStageNum,ValueMapTy * VRMap)10301d5ae102SDimitry Andric void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
10311d5ae102SDimitry Andric                                                bool LastDef,
10321d5ae102SDimitry Andric                                                unsigned CurStageNum,
10331d5ae102SDimitry Andric                                                unsigned InstrStageNum,
10341d5ae102SDimitry Andric                                                ValueMapTy *VRMap) {
1035f65dcba8SDimitry Andric   for (MachineOperand &MO : NewMI->operands()) {
1036e3b55780SDimitry Andric     if (!MO.isReg() || !MO.getReg().isVirtual())
10371d5ae102SDimitry Andric       continue;
10381d5ae102SDimitry Andric     Register reg = MO.getReg();
10391d5ae102SDimitry Andric     if (MO.isDef()) {
10401d5ae102SDimitry Andric       // Create a new virtual register for the definition.
10411d5ae102SDimitry Andric       const TargetRegisterClass *RC = MRI.getRegClass(reg);
10421d5ae102SDimitry Andric       Register NewReg = MRI.createVirtualRegister(RC);
10431d5ae102SDimitry Andric       MO.setReg(NewReg);
10441d5ae102SDimitry Andric       VRMap[CurStageNum][reg] = NewReg;
10451d5ae102SDimitry Andric       if (LastDef)
10461d5ae102SDimitry Andric         replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
10471d5ae102SDimitry Andric     } else if (MO.isUse()) {
10481d5ae102SDimitry Andric       MachineInstr *Def = MRI.getVRegDef(reg);
10491d5ae102SDimitry Andric       // Compute the stage that contains the last definition for instruction.
10501d5ae102SDimitry Andric       int DefStageNum = Schedule.getStage(Def);
10511d5ae102SDimitry Andric       unsigned StageNum = CurStageNum;
10521d5ae102SDimitry Andric       if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
10531d5ae102SDimitry Andric         // Compute the difference in stages between the defintion and the use.
10541d5ae102SDimitry Andric         unsigned StageDiff = (InstrStageNum - DefStageNum);
10551d5ae102SDimitry Andric         // Make an adjustment to get the last definition.
10561d5ae102SDimitry Andric         StageNum -= StageDiff;
10571d5ae102SDimitry Andric       }
10581d5ae102SDimitry Andric       if (VRMap[StageNum].count(reg))
10591d5ae102SDimitry Andric         MO.setReg(VRMap[StageNum][reg]);
10601d5ae102SDimitry Andric     }
10611d5ae102SDimitry Andric   }
10621d5ae102SDimitry Andric }
10631d5ae102SDimitry Andric 
10641d5ae102SDimitry Andric /// Return the instruction in the loop that defines the register.
10651d5ae102SDimitry Andric /// If the definition is a Phi, then follow the Phi operand to
10661d5ae102SDimitry Andric /// the instruction in the loop.
findDefInLoop(unsigned Reg)10671d5ae102SDimitry Andric MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
10681d5ae102SDimitry Andric   SmallPtrSet<MachineInstr *, 8> Visited;
10691d5ae102SDimitry Andric   MachineInstr *Def = MRI.getVRegDef(Reg);
10701d5ae102SDimitry Andric   while (Def->isPHI()) {
10711d5ae102SDimitry Andric     if (!Visited.insert(Def).second)
10721d5ae102SDimitry Andric       break;
10731d5ae102SDimitry Andric     for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
10741d5ae102SDimitry Andric       if (Def->getOperand(i + 1).getMBB() == BB) {
10751d5ae102SDimitry Andric         Def = MRI.getVRegDef(Def->getOperand(i).getReg());
10761d5ae102SDimitry Andric         break;
10771d5ae102SDimitry Andric       }
10781d5ae102SDimitry Andric   }
10791d5ae102SDimitry Andric   return Def;
10801d5ae102SDimitry Andric }
10811d5ae102SDimitry Andric 
10821d5ae102SDimitry Andric /// Return the new name for the value from the previous stage.
getPrevMapVal(unsigned StageNum,unsigned PhiStage,unsigned LoopVal,unsigned LoopStage,ValueMapTy * VRMap,MachineBasicBlock * BB)10831d5ae102SDimitry Andric unsigned ModuloScheduleExpander::getPrevMapVal(
10841d5ae102SDimitry Andric     unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
10851d5ae102SDimitry Andric     ValueMapTy *VRMap, MachineBasicBlock *BB) {
10861d5ae102SDimitry Andric   unsigned PrevVal = 0;
10871d5ae102SDimitry Andric   if (StageNum > PhiStage) {
10881d5ae102SDimitry Andric     MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
10891d5ae102SDimitry Andric     if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
10901d5ae102SDimitry Andric       // The name is defined in the previous stage.
10911d5ae102SDimitry Andric       PrevVal = VRMap[StageNum - 1][LoopVal];
10921d5ae102SDimitry Andric     else if (VRMap[StageNum].count(LoopVal))
10931d5ae102SDimitry Andric       // The previous name is defined in the current stage when the instruction
10941d5ae102SDimitry Andric       // order is swapped.
10951d5ae102SDimitry Andric       PrevVal = VRMap[StageNum][LoopVal];
10961d5ae102SDimitry Andric     else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
10971d5ae102SDimitry Andric       // The loop value hasn't yet been scheduled.
10981d5ae102SDimitry Andric       PrevVal = LoopVal;
10991d5ae102SDimitry Andric     else if (StageNum == PhiStage + 1)
11001d5ae102SDimitry Andric       // The loop value is another phi, which has not been scheduled.
11011d5ae102SDimitry Andric       PrevVal = getInitPhiReg(*LoopInst, BB);
11021d5ae102SDimitry Andric     else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
11031d5ae102SDimitry Andric       // The loop value is another phi, which has been scheduled.
11041d5ae102SDimitry Andric       PrevVal =
11051d5ae102SDimitry Andric           getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
11061d5ae102SDimitry Andric                         LoopStage, VRMap, BB);
11071d5ae102SDimitry Andric   }
11081d5ae102SDimitry Andric   return PrevVal;
11091d5ae102SDimitry Andric }
11101d5ae102SDimitry Andric 
11111d5ae102SDimitry Andric /// Rewrite the Phi values in the specified block to use the mappings
11121d5ae102SDimitry Andric /// from the initial operand. Once the Phi is scheduled, we switch
11131d5ae102SDimitry Andric /// to using the loop value instead of the Phi value, so those names
11141d5ae102SDimitry Andric /// do not need to be rewritten.
rewritePhiValues(MachineBasicBlock * NewBB,unsigned StageNum,ValueMapTy * VRMap,InstrMapTy & InstrMap)11151d5ae102SDimitry Andric void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
11161d5ae102SDimitry Andric                                               unsigned StageNum,
11171d5ae102SDimitry Andric                                               ValueMapTy *VRMap,
11181d5ae102SDimitry Andric                                               InstrMapTy &InstrMap) {
11191d5ae102SDimitry Andric   for (auto &PHI : BB->phis()) {
11201d5ae102SDimitry Andric     unsigned InitVal = 0;
11211d5ae102SDimitry Andric     unsigned LoopVal = 0;
11221d5ae102SDimitry Andric     getPhiRegs(PHI, BB, InitVal, LoopVal);
11231d5ae102SDimitry Andric     Register PhiDef = PHI.getOperand(0).getReg();
11241d5ae102SDimitry Andric 
11251d5ae102SDimitry Andric     unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
11261d5ae102SDimitry Andric     unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
11271d5ae102SDimitry Andric     unsigned NumPhis = getStagesForPhi(PhiDef);
11281d5ae102SDimitry Andric     if (NumPhis > StageNum)
11291d5ae102SDimitry Andric       NumPhis = StageNum;
11301d5ae102SDimitry Andric     for (unsigned np = 0; np <= NumPhis; ++np) {
11311d5ae102SDimitry Andric       unsigned NewVal =
11321d5ae102SDimitry Andric           getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
11331d5ae102SDimitry Andric       if (!NewVal)
11341d5ae102SDimitry Andric         NewVal = InitVal;
11351d5ae102SDimitry Andric       rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
11361d5ae102SDimitry Andric                             NewVal);
11371d5ae102SDimitry Andric     }
11381d5ae102SDimitry Andric   }
11391d5ae102SDimitry Andric }
11401d5ae102SDimitry Andric 
11411d5ae102SDimitry Andric /// Rewrite a previously scheduled instruction to use the register value
11421d5ae102SDimitry Andric /// from the new instruction. Make sure the instruction occurs in the
11431d5ae102SDimitry Andric /// basic block, and we don't change the uses in the new instruction.
rewriteScheduledInstr(MachineBasicBlock * BB,InstrMapTy & InstrMap,unsigned CurStageNum,unsigned PhiNum,MachineInstr * Phi,unsigned OldReg,unsigned NewReg,unsigned PrevReg)11441d5ae102SDimitry Andric void ModuloScheduleExpander::rewriteScheduledInstr(
11451d5ae102SDimitry Andric     MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
11461d5ae102SDimitry Andric     unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
11471d5ae102SDimitry Andric     unsigned PrevReg) {
11481d5ae102SDimitry Andric   bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
11491d5ae102SDimitry Andric   int StagePhi = Schedule.getStage(Phi) + PhiNum;
11501d5ae102SDimitry Andric   // Rewrite uses that have been scheduled already to use the new
11511d5ae102SDimitry Andric   // Phi register.
1152c0981da4SDimitry Andric   for (MachineOperand &UseOp :
1153c0981da4SDimitry Andric        llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
11541d5ae102SDimitry Andric     MachineInstr *UseMI = UseOp.getParent();
11551d5ae102SDimitry Andric     if (UseMI->getParent() != BB)
11561d5ae102SDimitry Andric       continue;
11571d5ae102SDimitry Andric     if (UseMI->isPHI()) {
11581d5ae102SDimitry Andric       if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
11591d5ae102SDimitry Andric         continue;
11601d5ae102SDimitry Andric       if (getLoopPhiReg(*UseMI, BB) != OldReg)
11611d5ae102SDimitry Andric         continue;
11621d5ae102SDimitry Andric     }
11631d5ae102SDimitry Andric     InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
11641d5ae102SDimitry Andric     assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
11651d5ae102SDimitry Andric     MachineInstr *OrigMI = OrigInstr->second;
11661d5ae102SDimitry Andric     int StageSched = Schedule.getStage(OrigMI);
11671d5ae102SDimitry Andric     int CycleSched = Schedule.getCycle(OrigMI);
11681d5ae102SDimitry Andric     unsigned ReplaceReg = 0;
11691d5ae102SDimitry Andric     // This is the stage for the scheduled instruction.
11701d5ae102SDimitry Andric     if (StagePhi == StageSched && Phi->isPHI()) {
11711d5ae102SDimitry Andric       int CyclePhi = Schedule.getCycle(Phi);
11721d5ae102SDimitry Andric       if (PrevReg && InProlog)
11731d5ae102SDimitry Andric         ReplaceReg = PrevReg;
11741d5ae102SDimitry Andric       else if (PrevReg && !isLoopCarried(*Phi) &&
11751d5ae102SDimitry Andric                (CyclePhi <= CycleSched || OrigMI->isPHI()))
11761d5ae102SDimitry Andric         ReplaceReg = PrevReg;
11771d5ae102SDimitry Andric       else
11781d5ae102SDimitry Andric         ReplaceReg = NewReg;
11791d5ae102SDimitry Andric     }
11801d5ae102SDimitry Andric     // The scheduled instruction occurs before the scheduled Phi, and the
11811d5ae102SDimitry Andric     // Phi is not loop carried.
11821d5ae102SDimitry Andric     if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
11831d5ae102SDimitry Andric       ReplaceReg = NewReg;
11841d5ae102SDimitry Andric     if (StagePhi > StageSched && Phi->isPHI())
11851d5ae102SDimitry Andric       ReplaceReg = NewReg;
11861d5ae102SDimitry Andric     if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
11871d5ae102SDimitry Andric       ReplaceReg = NewReg;
11881d5ae102SDimitry Andric     if (ReplaceReg) {
1189145449b1SDimitry Andric       const TargetRegisterClass *NRC =
11901d5ae102SDimitry Andric           MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1191145449b1SDimitry Andric       if (NRC)
11921d5ae102SDimitry Andric         UseOp.setReg(ReplaceReg);
1193145449b1SDimitry Andric       else {
1194145449b1SDimitry Andric         Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1195145449b1SDimitry Andric         BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1196145449b1SDimitry Andric                 SplitReg)
1197145449b1SDimitry Andric             .addReg(ReplaceReg);
1198145449b1SDimitry Andric         UseOp.setReg(SplitReg);
1199145449b1SDimitry Andric       }
12001d5ae102SDimitry Andric     }
12011d5ae102SDimitry Andric   }
12021d5ae102SDimitry Andric }
12031d5ae102SDimitry Andric 
isLoopCarried(MachineInstr & Phi)12041d5ae102SDimitry Andric bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
12051d5ae102SDimitry Andric   if (!Phi.isPHI())
12061d5ae102SDimitry Andric     return false;
1207706b4fc4SDimitry Andric   int DefCycle = Schedule.getCycle(&Phi);
12081d5ae102SDimitry Andric   int DefStage = Schedule.getStage(&Phi);
12091d5ae102SDimitry Andric 
12101d5ae102SDimitry Andric   unsigned InitVal = 0;
12111d5ae102SDimitry Andric   unsigned LoopVal = 0;
12121d5ae102SDimitry Andric   getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
12131d5ae102SDimitry Andric   MachineInstr *Use = MRI.getVRegDef(LoopVal);
12141d5ae102SDimitry Andric   if (!Use || Use->isPHI())
12151d5ae102SDimitry Andric     return true;
1216706b4fc4SDimitry Andric   int LoopCycle = Schedule.getCycle(Use);
12171d5ae102SDimitry Andric   int LoopStage = Schedule.getStage(Use);
12181d5ae102SDimitry Andric   return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
12191d5ae102SDimitry Andric }
12201d5ae102SDimitry Andric 
12211d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
12221d5ae102SDimitry Andric // PeelingModuloScheduleExpander implementation
12231d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
12241d5ae102SDimitry Andric // This is a reimplementation of ModuloScheduleExpander that works by creating
12251d5ae102SDimitry Andric // a fully correct steady-state kernel and peeling off the prolog and epilogs.
12261d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
12271d5ae102SDimitry Andric 
12281d5ae102SDimitry Andric namespace {
12291d5ae102SDimitry Andric // Remove any dead phis in MBB. Dead phis either have only one block as input
12301d5ae102SDimitry Andric // (in which case they are the identity) or have no uses.
EliminateDeadPhis(MachineBasicBlock * MBB,MachineRegisterInfo & MRI,LiveIntervals * LIS,bool KeepSingleSrcPhi=false)12311d5ae102SDimitry Andric void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1232706b4fc4SDimitry Andric                        LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
12331d5ae102SDimitry Andric   bool Changed = true;
12341d5ae102SDimitry Andric   while (Changed) {
12351d5ae102SDimitry Andric     Changed = false;
1236c0981da4SDimitry Andric     for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {
12371d5ae102SDimitry Andric       assert(MI.isPHI());
12381d5ae102SDimitry Andric       if (MRI.use_empty(MI.getOperand(0).getReg())) {
12391d5ae102SDimitry Andric         if (LIS)
12401d5ae102SDimitry Andric           LIS->RemoveMachineInstrFromMaps(MI);
12411d5ae102SDimitry Andric         MI.eraseFromParent();
12421d5ae102SDimitry Andric         Changed = true;
1243706b4fc4SDimitry Andric       } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1244145449b1SDimitry Andric         const TargetRegisterClass *ConstrainRegClass =
12451d5ae102SDimitry Andric             MRI.constrainRegClass(MI.getOperand(1).getReg(),
12461d5ae102SDimitry Andric                                   MRI.getRegClass(MI.getOperand(0).getReg()));
1247145449b1SDimitry Andric         assert(ConstrainRegClass &&
1248145449b1SDimitry Andric                "Expected a valid constrained register class!");
1249145449b1SDimitry Andric         (void)ConstrainRegClass;
12501d5ae102SDimitry Andric         MRI.replaceRegWith(MI.getOperand(0).getReg(),
12511d5ae102SDimitry Andric                            MI.getOperand(1).getReg());
12521d5ae102SDimitry Andric         if (LIS)
12531d5ae102SDimitry Andric           LIS->RemoveMachineInstrFromMaps(MI);
12541d5ae102SDimitry Andric         MI.eraseFromParent();
12551d5ae102SDimitry Andric         Changed = true;
12561d5ae102SDimitry Andric       }
12571d5ae102SDimitry Andric     }
12581d5ae102SDimitry Andric   }
12591d5ae102SDimitry Andric }
12601d5ae102SDimitry Andric 
12611d5ae102SDimitry Andric /// Rewrites the kernel block in-place to adhere to the given schedule.
12621d5ae102SDimitry Andric /// KernelRewriter holds all of the state required to perform the rewriting.
12631d5ae102SDimitry Andric class KernelRewriter {
12641d5ae102SDimitry Andric   ModuloSchedule &S;
12651d5ae102SDimitry Andric   MachineBasicBlock *BB;
12661d5ae102SDimitry Andric   MachineBasicBlock *PreheaderBB, *ExitBB;
12671d5ae102SDimitry Andric   MachineRegisterInfo &MRI;
12681d5ae102SDimitry Andric   const TargetInstrInfo *TII;
12691d5ae102SDimitry Andric   LiveIntervals *LIS;
12701d5ae102SDimitry Andric 
12711d5ae102SDimitry Andric   // Map from register class to canonical undef register for that class.
12721d5ae102SDimitry Andric   DenseMap<const TargetRegisterClass *, Register> Undefs;
12731d5ae102SDimitry Andric   // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
12741d5ae102SDimitry Andric   // this map is only used when InitReg is non-undef.
12751d5ae102SDimitry Andric   DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
12761d5ae102SDimitry Andric   // Map from LoopReg to phi register where the InitReg is undef.
12771d5ae102SDimitry Andric   DenseMap<Register, Register> UndefPhis;
12781d5ae102SDimitry Andric 
12791d5ae102SDimitry Andric   // Reg is used by MI. Return the new register MI should use to adhere to the
12801d5ae102SDimitry Andric   // schedule. Insert phis as necessary.
12811d5ae102SDimitry Andric   Register remapUse(Register Reg, MachineInstr &MI);
12821d5ae102SDimitry Andric   // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
12831d5ae102SDimitry Andric   // If InitReg is not given it is chosen arbitrarily. It will either be undef
12841d5ae102SDimitry Andric   // or will be chosen so as to share another phi.
1285e3b55780SDimitry Andric   Register phi(Register LoopReg, std::optional<Register> InitReg = {},
12861d5ae102SDimitry Andric                const TargetRegisterClass *RC = nullptr);
12871d5ae102SDimitry Andric   // Create an undef register of the given register class.
12881d5ae102SDimitry Andric   Register undef(const TargetRegisterClass *RC);
12891d5ae102SDimitry Andric 
12901d5ae102SDimitry Andric public:
1291344a3780SDimitry Andric   KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
12921d5ae102SDimitry Andric                  LiveIntervals *LIS = nullptr);
12931d5ae102SDimitry Andric   void rewrite();
12941d5ae102SDimitry Andric };
12951d5ae102SDimitry Andric } // namespace
12961d5ae102SDimitry Andric 
KernelRewriter(MachineLoop & L,ModuloSchedule & S,MachineBasicBlock * LoopBB,LiveIntervals * LIS)12971d5ae102SDimitry Andric KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1298344a3780SDimitry Andric                                MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1299344a3780SDimitry Andric     : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
13001d5ae102SDimitry Andric       ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
13011d5ae102SDimitry Andric       TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
13021d5ae102SDimitry Andric   PreheaderBB = *BB->pred_begin();
13031d5ae102SDimitry Andric   if (PreheaderBB == BB)
13041d5ae102SDimitry Andric     PreheaderBB = *std::next(BB->pred_begin());
13051d5ae102SDimitry Andric }
13061d5ae102SDimitry Andric 
rewrite()13071d5ae102SDimitry Andric void KernelRewriter::rewrite() {
13081d5ae102SDimitry Andric   // Rearrange the loop to be in schedule order. Note that the schedule may
13091d5ae102SDimitry Andric   // contain instructions that are not owned by the loop block (InstrChanges and
13101d5ae102SDimitry Andric   // friends), so we gracefully handle unowned instructions and delete any
13111d5ae102SDimitry Andric   // instructions that weren't in the schedule.
13121d5ae102SDimitry Andric   auto InsertPt = BB->getFirstTerminator();
13131d5ae102SDimitry Andric   MachineInstr *FirstMI = nullptr;
13141d5ae102SDimitry Andric   for (MachineInstr *MI : S.getInstructions()) {
13151d5ae102SDimitry Andric     if (MI->isPHI())
13161d5ae102SDimitry Andric       continue;
13171d5ae102SDimitry Andric     if (MI->getParent())
13181d5ae102SDimitry Andric       MI->removeFromParent();
13191d5ae102SDimitry Andric     BB->insert(InsertPt, MI);
13201d5ae102SDimitry Andric     if (!FirstMI)
13211d5ae102SDimitry Andric       FirstMI = MI;
13221d5ae102SDimitry Andric   }
13231d5ae102SDimitry Andric   assert(FirstMI && "Failed to find first MI in schedule");
13241d5ae102SDimitry Andric 
13251d5ae102SDimitry Andric   // At this point all of the scheduled instructions are between FirstMI
13261d5ae102SDimitry Andric   // and the end of the block. Kill from the first non-phi to FirstMI.
13271d5ae102SDimitry Andric   for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
13281d5ae102SDimitry Andric     if (LIS)
13291d5ae102SDimitry Andric       LIS->RemoveMachineInstrFromMaps(*I);
13301d5ae102SDimitry Andric     (I++)->eraseFromParent();
13311d5ae102SDimitry Andric   }
13321d5ae102SDimitry Andric 
13331d5ae102SDimitry Andric   // Now remap every instruction in the loop.
13341d5ae102SDimitry Andric   for (MachineInstr &MI : *BB) {
13351d5ae102SDimitry Andric     if (MI.isPHI() || MI.isTerminator())
13361d5ae102SDimitry Andric       continue;
13371d5ae102SDimitry Andric     for (MachineOperand &MO : MI.uses()) {
13381d5ae102SDimitry Andric       if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
13391d5ae102SDimitry Andric         continue;
13401d5ae102SDimitry Andric       Register Reg = remapUse(MO.getReg(), MI);
13411d5ae102SDimitry Andric       MO.setReg(Reg);
13421d5ae102SDimitry Andric     }
13431d5ae102SDimitry Andric   }
13441d5ae102SDimitry Andric   EliminateDeadPhis(BB, MRI, LIS);
13451d5ae102SDimitry Andric 
13461d5ae102SDimitry Andric   // Ensure a phi exists for all instructions that are either referenced by
13471d5ae102SDimitry Andric   // an illegal phi or by an instruction outside the loop. This allows us to
13481d5ae102SDimitry Andric   // treat remaps of these values the same as "normal" values that come from
13491d5ae102SDimitry Andric   // loop-carried phis.
13501d5ae102SDimitry Andric   for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
13511d5ae102SDimitry Andric     if (MI->isPHI()) {
13521d5ae102SDimitry Andric       Register R = MI->getOperand(0).getReg();
13531d5ae102SDimitry Andric       phi(R);
13541d5ae102SDimitry Andric       continue;
13551d5ae102SDimitry Andric     }
13561d5ae102SDimitry Andric 
13571d5ae102SDimitry Andric     for (MachineOperand &Def : MI->defs()) {
13581d5ae102SDimitry Andric       for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
13591d5ae102SDimitry Andric         if (MI.getParent() != BB) {
13601d5ae102SDimitry Andric           phi(Def.getReg());
13611d5ae102SDimitry Andric           break;
13621d5ae102SDimitry Andric         }
13631d5ae102SDimitry Andric       }
13641d5ae102SDimitry Andric     }
13651d5ae102SDimitry Andric   }
13661d5ae102SDimitry Andric }
13671d5ae102SDimitry Andric 
remapUse(Register Reg,MachineInstr & MI)13681d5ae102SDimitry Andric Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
13691d5ae102SDimitry Andric   MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
13701d5ae102SDimitry Andric   if (!Producer)
13711d5ae102SDimitry Andric     return Reg;
13721d5ae102SDimitry Andric 
13731d5ae102SDimitry Andric   int ConsumerStage = S.getStage(&MI);
13741d5ae102SDimitry Andric   if (!Producer->isPHI()) {
13751d5ae102SDimitry Andric     // Non-phi producers are simple to remap. Insert as many phis as the
13761d5ae102SDimitry Andric     // difference between the consumer and producer stages.
13771d5ae102SDimitry Andric     if (Producer->getParent() != BB)
13781d5ae102SDimitry Andric       // Producer was not inside the loop. Use the register as-is.
13791d5ae102SDimitry Andric       return Reg;
13801d5ae102SDimitry Andric     int ProducerStage = S.getStage(Producer);
13811d5ae102SDimitry Andric     assert(ConsumerStage != -1 &&
13821d5ae102SDimitry Andric            "In-loop consumer should always be scheduled!");
13831d5ae102SDimitry Andric     assert(ConsumerStage >= ProducerStage);
13841d5ae102SDimitry Andric     unsigned StageDiff = ConsumerStage - ProducerStage;
13851d5ae102SDimitry Andric 
13861d5ae102SDimitry Andric     for (unsigned I = 0; I < StageDiff; ++I)
13871d5ae102SDimitry Andric       Reg = phi(Reg);
13881d5ae102SDimitry Andric     return Reg;
13891d5ae102SDimitry Andric   }
13901d5ae102SDimitry Andric 
13911d5ae102SDimitry Andric   // First, dive through the phi chain to find the defaults for the generated
13921d5ae102SDimitry Andric   // phis.
1393e3b55780SDimitry Andric   SmallVector<std::optional<Register>, 4> Defaults;
13941d5ae102SDimitry Andric   Register LoopReg = Reg;
13951d5ae102SDimitry Andric   auto LoopProducer = Producer;
13961d5ae102SDimitry Andric   while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
13971d5ae102SDimitry Andric     LoopReg = getLoopPhiReg(*LoopProducer, BB);
13981d5ae102SDimitry Andric     Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
13991d5ae102SDimitry Andric     LoopProducer = MRI.getUniqueVRegDef(LoopReg);
14001d5ae102SDimitry Andric     assert(LoopProducer);
14011d5ae102SDimitry Andric   }
14021d5ae102SDimitry Andric   int LoopProducerStage = S.getStage(LoopProducer);
14031d5ae102SDimitry Andric 
1404e3b55780SDimitry Andric   std::optional<Register> IllegalPhiDefault;
14051d5ae102SDimitry Andric 
14061d5ae102SDimitry Andric   if (LoopProducerStage == -1) {
14071d5ae102SDimitry Andric     // Do nothing.
14081d5ae102SDimitry Andric   } else if (LoopProducerStage > ConsumerStage) {
14091d5ae102SDimitry Andric     // This schedule is only representable if ProducerStage == ConsumerStage+1.
14101d5ae102SDimitry Andric     // In addition, Consumer's cycle must be scheduled after Producer in the
14111d5ae102SDimitry Andric     // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
14121d5ae102SDimitry Andric     // functions.
14131d5ae102SDimitry Andric #ifndef NDEBUG // Silence unused variables in non-asserts mode.
14141d5ae102SDimitry Andric     int LoopProducerCycle = S.getCycle(LoopProducer);
14151d5ae102SDimitry Andric     int ConsumerCycle = S.getCycle(&MI);
14161d5ae102SDimitry Andric #endif
14171d5ae102SDimitry Andric     assert(LoopProducerCycle <= ConsumerCycle);
14181d5ae102SDimitry Andric     assert(LoopProducerStage == ConsumerStage + 1);
14191d5ae102SDimitry Andric     // Peel off the first phi from Defaults and insert a phi between producer
14201d5ae102SDimitry Andric     // and consumer. This phi will not be at the front of the block so we
14211d5ae102SDimitry Andric     // consider it illegal. It will only exist during the rewrite process; it
14221d5ae102SDimitry Andric     // needs to exist while we peel off prologs because these could take the
14231d5ae102SDimitry Andric     // default value. After that we can replace all uses with the loop producer
14241d5ae102SDimitry Andric     // value.
14251d5ae102SDimitry Andric     IllegalPhiDefault = Defaults.front();
14261d5ae102SDimitry Andric     Defaults.erase(Defaults.begin());
14271d5ae102SDimitry Andric   } else {
14281d5ae102SDimitry Andric     assert(ConsumerStage >= LoopProducerStage);
14291d5ae102SDimitry Andric     int StageDiff = ConsumerStage - LoopProducerStage;
14301d5ae102SDimitry Andric     if (StageDiff > 0) {
14311d5ae102SDimitry Andric       LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
14321d5ae102SDimitry Andric                         << " to " << (Defaults.size() + StageDiff) << "\n");
14331d5ae102SDimitry Andric       // If we need more phis than we have defaults for, pad out with undefs for
14341d5ae102SDimitry Andric       // the earliest phis, which are at the end of the defaults chain (the
14351d5ae102SDimitry Andric       // chain is in reverse order).
1436e3b55780SDimitry Andric       Defaults.resize(Defaults.size() + StageDiff,
1437e3b55780SDimitry Andric                       Defaults.empty() ? std::optional<Register>()
14381d5ae102SDimitry Andric                                        : Defaults.back());
14391d5ae102SDimitry Andric     }
14401d5ae102SDimitry Andric   }
14411d5ae102SDimitry Andric 
14421d5ae102SDimitry Andric   // Now we know the number of stages to jump back, insert the phi chain.
14431d5ae102SDimitry Andric   auto DefaultI = Defaults.rbegin();
14441d5ae102SDimitry Andric   while (DefaultI != Defaults.rend())
14451d5ae102SDimitry Andric     LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
14461d5ae102SDimitry Andric 
1447145449b1SDimitry Andric   if (IllegalPhiDefault) {
14481d5ae102SDimitry Andric     // The consumer optionally consumes LoopProducer in the same iteration
14491d5ae102SDimitry Andric     // (because the producer is scheduled at an earlier cycle than the consumer)
14501d5ae102SDimitry Andric     // or the initial value. To facilitate this we create an illegal block here
14511d5ae102SDimitry Andric     // by embedding a phi in the middle of the block. We will fix this up
14521d5ae102SDimitry Andric     // immediately prior to pruning.
14531d5ae102SDimitry Andric     auto RC = MRI.getRegClass(Reg);
14541d5ae102SDimitry Andric     Register R = MRI.createVirtualRegister(RC);
1455cfca06d7SDimitry Andric     MachineInstr *IllegalPhi =
14561d5ae102SDimitry Andric         BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1457145449b1SDimitry Andric             .addReg(*IllegalPhiDefault)
14581d5ae102SDimitry Andric             .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
14591d5ae102SDimitry Andric             .addReg(LoopReg)
14601d5ae102SDimitry Andric             .addMBB(BB); // Block choice is arbitrary and has no effect.
1461cfca06d7SDimitry Andric     // Illegal phi should belong to the producer stage so that it can be
1462cfca06d7SDimitry Andric     // filtered correctly during peeling.
1463cfca06d7SDimitry Andric     S.setStage(IllegalPhi, LoopProducerStage);
14641d5ae102SDimitry Andric     return R;
14651d5ae102SDimitry Andric   }
14661d5ae102SDimitry Andric 
14671d5ae102SDimitry Andric   return LoopReg;
14681d5ae102SDimitry Andric }
14691d5ae102SDimitry Andric 
phi(Register LoopReg,std::optional<Register> InitReg,const TargetRegisterClass * RC)1470e3b55780SDimitry Andric Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
14711d5ae102SDimitry Andric                              const TargetRegisterClass *RC) {
14721d5ae102SDimitry Andric   // If the init register is not undef, try and find an existing phi.
1473145449b1SDimitry Andric   if (InitReg) {
1474e3b55780SDimitry Andric     auto I = Phis.find({LoopReg, *InitReg});
14751d5ae102SDimitry Andric     if (I != Phis.end())
14761d5ae102SDimitry Andric       return I->second;
14771d5ae102SDimitry Andric   } else {
14781d5ae102SDimitry Andric     for (auto &KV : Phis) {
14791d5ae102SDimitry Andric       if (KV.first.first == LoopReg)
14801d5ae102SDimitry Andric         return KV.second;
14811d5ae102SDimitry Andric     }
14821d5ae102SDimitry Andric   }
14831d5ae102SDimitry Andric 
14841d5ae102SDimitry Andric   // InitReg is either undef or no existing phi takes InitReg as input. Try and
14851d5ae102SDimitry Andric   // find a phi that takes undef as input.
14861d5ae102SDimitry Andric   auto I = UndefPhis.find(LoopReg);
14871d5ae102SDimitry Andric   if (I != UndefPhis.end()) {
14881d5ae102SDimitry Andric     Register R = I->second;
1489145449b1SDimitry Andric     if (!InitReg)
14901d5ae102SDimitry Andric       // Found a phi taking undef as input, and this input is undef so return
14911d5ae102SDimitry Andric       // without any more changes.
14921d5ae102SDimitry Andric       return R;
14931d5ae102SDimitry Andric     // Found a phi taking undef as input, so rewrite it to take InitReg.
14941d5ae102SDimitry Andric     MachineInstr *MI = MRI.getVRegDef(R);
1495e3b55780SDimitry Andric     MI->getOperand(1).setReg(*InitReg);
1496e3b55780SDimitry Andric     Phis.insert({{LoopReg, *InitReg}, R});
1497145449b1SDimitry Andric     const TargetRegisterClass *ConstrainRegClass =
1498e3b55780SDimitry Andric         MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1499145449b1SDimitry Andric     assert(ConstrainRegClass && "Expected a valid constrained register class!");
1500145449b1SDimitry Andric     (void)ConstrainRegClass;
15011d5ae102SDimitry Andric     UndefPhis.erase(I);
15021d5ae102SDimitry Andric     return R;
15031d5ae102SDimitry Andric   }
15041d5ae102SDimitry Andric 
15051d5ae102SDimitry Andric   // Failed to find any existing phi to reuse, so create a new one.
15061d5ae102SDimitry Andric   if (!RC)
15071d5ae102SDimitry Andric     RC = MRI.getRegClass(LoopReg);
15081d5ae102SDimitry Andric   Register R = MRI.createVirtualRegister(RC);
1509145449b1SDimitry Andric   if (InitReg) {
1510145449b1SDimitry Andric     const TargetRegisterClass *ConstrainRegClass =
15111d5ae102SDimitry Andric         MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1512145449b1SDimitry Andric     assert(ConstrainRegClass && "Expected a valid constrained register class!");
1513145449b1SDimitry Andric     (void)ConstrainRegClass;
1514145449b1SDimitry Andric   }
15151d5ae102SDimitry Andric   BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1516145449b1SDimitry Andric       .addReg(InitReg ? *InitReg : undef(RC))
15171d5ae102SDimitry Andric       .addMBB(PreheaderBB)
15181d5ae102SDimitry Andric       .addReg(LoopReg)
15191d5ae102SDimitry Andric       .addMBB(BB);
1520145449b1SDimitry Andric   if (!InitReg)
15211d5ae102SDimitry Andric     UndefPhis[LoopReg] = R;
15221d5ae102SDimitry Andric   else
15231d5ae102SDimitry Andric     Phis[{LoopReg, *InitReg}] = R;
15241d5ae102SDimitry Andric   return R;
15251d5ae102SDimitry Andric }
15261d5ae102SDimitry Andric 
undef(const TargetRegisterClass * RC)15271d5ae102SDimitry Andric Register KernelRewriter::undef(const TargetRegisterClass *RC) {
15281d5ae102SDimitry Andric   Register &R = Undefs[RC];
15291d5ae102SDimitry Andric   if (R == 0) {
15301d5ae102SDimitry Andric     // Create an IMPLICIT_DEF that defines this register if we need it.
15311d5ae102SDimitry Andric     // All uses of this should be removed by the time we have finished unrolling
15321d5ae102SDimitry Andric     // prologs and epilogs.
15331d5ae102SDimitry Andric     R = MRI.createVirtualRegister(RC);
15341d5ae102SDimitry Andric     auto *InsertBB = &PreheaderBB->getParent()->front();
15351d5ae102SDimitry Andric     BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
15361d5ae102SDimitry Andric             TII->get(TargetOpcode::IMPLICIT_DEF), R);
15371d5ae102SDimitry Andric   }
15381d5ae102SDimitry Andric   return R;
15391d5ae102SDimitry Andric }
15401d5ae102SDimitry Andric 
15411d5ae102SDimitry Andric namespace {
15421d5ae102SDimitry Andric /// Describes an operand in the kernel of a pipelined loop. Characteristics of
15431d5ae102SDimitry Andric /// the operand are discovered, such as how many in-loop PHIs it has to jump
15441d5ae102SDimitry Andric /// through and defaults for these phis.
15451d5ae102SDimitry Andric class KernelOperandInfo {
15461d5ae102SDimitry Andric   MachineBasicBlock *BB;
15471d5ae102SDimitry Andric   MachineRegisterInfo &MRI;
15481d5ae102SDimitry Andric   SmallVector<Register, 4> PhiDefaults;
15491d5ae102SDimitry Andric   MachineOperand *Source;
15501d5ae102SDimitry Andric   MachineOperand *Target;
15511d5ae102SDimitry Andric 
15521d5ae102SDimitry Andric public:
KernelOperandInfo(MachineOperand * MO,MachineRegisterInfo & MRI,const SmallPtrSetImpl<MachineInstr * > & IllegalPhis)15531d5ae102SDimitry Andric   KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
15541d5ae102SDimitry Andric                     const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
15551d5ae102SDimitry Andric       : MRI(MRI) {
15561d5ae102SDimitry Andric     Source = MO;
15571d5ae102SDimitry Andric     BB = MO->getParent()->getParent();
15581d5ae102SDimitry Andric     while (isRegInLoop(MO)) {
15591d5ae102SDimitry Andric       MachineInstr *MI = MRI.getVRegDef(MO->getReg());
15601d5ae102SDimitry Andric       if (MI->isFullCopy()) {
15611d5ae102SDimitry Andric         MO = &MI->getOperand(1);
15621d5ae102SDimitry Andric         continue;
15631d5ae102SDimitry Andric       }
15641d5ae102SDimitry Andric       if (!MI->isPHI())
15651d5ae102SDimitry Andric         break;
15661d5ae102SDimitry Andric       // If this is an illegal phi, don't count it in distance.
15671d5ae102SDimitry Andric       if (IllegalPhis.count(MI)) {
15681d5ae102SDimitry Andric         MO = &MI->getOperand(3);
15691d5ae102SDimitry Andric         continue;
15701d5ae102SDimitry Andric       }
15711d5ae102SDimitry Andric 
15721d5ae102SDimitry Andric       Register Default = getInitPhiReg(*MI, BB);
15731d5ae102SDimitry Andric       MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
15741d5ae102SDimitry Andric                                             : &MI->getOperand(3);
15751d5ae102SDimitry Andric       PhiDefaults.push_back(Default);
15761d5ae102SDimitry Andric     }
15771d5ae102SDimitry Andric     Target = MO;
15781d5ae102SDimitry Andric   }
15791d5ae102SDimitry Andric 
operator ==(const KernelOperandInfo & Other) const15801d5ae102SDimitry Andric   bool operator==(const KernelOperandInfo &Other) const {
15811d5ae102SDimitry Andric     return PhiDefaults.size() == Other.PhiDefaults.size();
15821d5ae102SDimitry Andric   }
15831d5ae102SDimitry Andric 
print(raw_ostream & OS) const15841d5ae102SDimitry Andric   void print(raw_ostream &OS) const {
15851d5ae102SDimitry Andric     OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
15861d5ae102SDimitry Andric        << *Source->getParent();
15871d5ae102SDimitry Andric   }
15881d5ae102SDimitry Andric 
15891d5ae102SDimitry Andric private:
isRegInLoop(MachineOperand * MO)15901d5ae102SDimitry Andric   bool isRegInLoop(MachineOperand *MO) {
15911d5ae102SDimitry Andric     return MO->isReg() && MO->getReg().isVirtual() &&
15921d5ae102SDimitry Andric            MRI.getVRegDef(MO->getReg())->getParent() == BB;
15931d5ae102SDimitry Andric   }
15941d5ae102SDimitry Andric };
15951d5ae102SDimitry Andric } // namespace
15961d5ae102SDimitry Andric 
15971d5ae102SDimitry Andric MachineBasicBlock *
peelKernel(LoopPeelDirection LPD)15981d5ae102SDimitry Andric PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
15991d5ae102SDimitry Andric   MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
16001d5ae102SDimitry Andric   if (LPD == LPD_Front)
16011d5ae102SDimitry Andric     PeeledFront.push_back(NewBB);
16021d5ae102SDimitry Andric   else
16031d5ae102SDimitry Andric     PeeledBack.push_front(NewBB);
16041d5ae102SDimitry Andric   for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
16051d5ae102SDimitry Andric        ++I, ++NI) {
16061d5ae102SDimitry Andric     CanonicalMIs[&*I] = &*I;
16071d5ae102SDimitry Andric     CanonicalMIs[&*NI] = &*I;
16081d5ae102SDimitry Andric     BlockMIs[{NewBB, &*I}] = &*NI;
16091d5ae102SDimitry Andric     BlockMIs[{BB, &*I}] = &*I;
16101d5ae102SDimitry Andric   }
16111d5ae102SDimitry Andric   return NewBB;
16121d5ae102SDimitry Andric }
16131d5ae102SDimitry Andric 
filterInstructions(MachineBasicBlock * MB,int MinStage)1614706b4fc4SDimitry Andric void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB,
1615706b4fc4SDimitry Andric                                                        int MinStage) {
1616706b4fc4SDimitry Andric   for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1617706b4fc4SDimitry Andric        I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1618706b4fc4SDimitry Andric     MachineInstr *MI = &*I++;
1619706b4fc4SDimitry Andric     int Stage = getStage(MI);
1620706b4fc4SDimitry Andric     if (Stage == -1 || Stage >= MinStage)
1621706b4fc4SDimitry Andric       continue;
1622706b4fc4SDimitry Andric 
1623706b4fc4SDimitry Andric     for (MachineOperand &DefMO : MI->defs()) {
1624706b4fc4SDimitry Andric       SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
1625706b4fc4SDimitry Andric       for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1626706b4fc4SDimitry Andric         // Only PHIs can use values from this block by construction.
1627706b4fc4SDimitry Andric         // Match with the equivalent PHI in B.
1628706b4fc4SDimitry Andric         assert(UseMI.isPHI());
1629706b4fc4SDimitry Andric         Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1630706b4fc4SDimitry Andric                                                MI->getParent());
1631706b4fc4SDimitry Andric         Subs.emplace_back(&UseMI, Reg);
1632706b4fc4SDimitry Andric       }
1633706b4fc4SDimitry Andric       for (auto &Sub : Subs)
1634706b4fc4SDimitry Andric         Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1635706b4fc4SDimitry Andric                                       *MRI.getTargetRegisterInfo());
1636706b4fc4SDimitry Andric     }
1637706b4fc4SDimitry Andric     if (LIS)
1638706b4fc4SDimitry Andric       LIS->RemoveMachineInstrFromMaps(*MI);
1639706b4fc4SDimitry Andric     MI->eraseFromParent();
1640706b4fc4SDimitry Andric   }
1641706b4fc4SDimitry Andric }
1642706b4fc4SDimitry Andric 
moveStageBetweenBlocks(MachineBasicBlock * DestBB,MachineBasicBlock * SourceBB,unsigned Stage)1643706b4fc4SDimitry Andric void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
1644706b4fc4SDimitry Andric     MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1645706b4fc4SDimitry Andric   auto InsertPt = DestBB->getFirstNonPHI();
1646706b4fc4SDimitry Andric   DenseMap<Register, Register> Remaps;
1647c0981da4SDimitry Andric   for (MachineInstr &MI : llvm::make_early_inc_range(
1648c0981da4SDimitry Andric            llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1649c0981da4SDimitry Andric     if (MI.isPHI()) {
1650706b4fc4SDimitry Andric       // This is an illegal PHI. If we move any instructions using an illegal
1651cfca06d7SDimitry Andric       // PHI, we need to create a legal Phi.
1652c0981da4SDimitry Andric       if (getStage(&MI) != Stage) {
1653cfca06d7SDimitry Andric         // The legal Phi is not necessary if the illegal phi's stage
1654cfca06d7SDimitry Andric         // is being moved.
1655c0981da4SDimitry Andric         Register PhiR = MI.getOperand(0).getReg();
1656706b4fc4SDimitry Andric         auto RC = MRI.getRegClass(PhiR);
1657706b4fc4SDimitry Andric         Register NR = MRI.createVirtualRegister(RC);
1658cfca06d7SDimitry Andric         MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1659cfca06d7SDimitry Andric                                    DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1660706b4fc4SDimitry Andric                                .addReg(PhiR)
1661706b4fc4SDimitry Andric                                .addMBB(SourceBB);
1662c0981da4SDimitry Andric         BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1663c0981da4SDimitry Andric         CanonicalMIs[NI] = CanonicalMIs[&MI];
1664706b4fc4SDimitry Andric         Remaps[PhiR] = NR;
1665cfca06d7SDimitry Andric       }
1666706b4fc4SDimitry Andric     }
1667c0981da4SDimitry Andric     if (getStage(&MI) != Stage)
1668706b4fc4SDimitry Andric       continue;
1669c0981da4SDimitry Andric     MI.removeFromParent();
1670c0981da4SDimitry Andric     DestBB->insert(InsertPt, &MI);
1671c0981da4SDimitry Andric     auto *KernelMI = CanonicalMIs[&MI];
1672c0981da4SDimitry Andric     BlockMIs[{DestBB, KernelMI}] = &MI;
1673706b4fc4SDimitry Andric     BlockMIs.erase({SourceBB, KernelMI});
1674706b4fc4SDimitry Andric   }
1675706b4fc4SDimitry Andric   SmallVector<MachineInstr *, 4> PhiToDelete;
1676706b4fc4SDimitry Andric   for (MachineInstr &MI : DestBB->phis()) {
1677706b4fc4SDimitry Andric     assert(MI.getNumOperands() == 3);
1678706b4fc4SDimitry Andric     MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1679706b4fc4SDimitry Andric     // If the instruction referenced by the phi is moved inside the block
1680706b4fc4SDimitry Andric     // we don't need the phi anymore.
1681706b4fc4SDimitry Andric     if (getStage(Def) == Stage) {
1682706b4fc4SDimitry Andric       Register PhiReg = MI.getOperand(0).getReg();
1683ac9a064cSDimitry Andric       assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1684ac9a064cSDimitry Andric                                             /*TRI=*/nullptr) != -1);
1685cfca06d7SDimitry Andric       MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1686706b4fc4SDimitry Andric       MI.getOperand(0).setReg(PhiReg);
1687706b4fc4SDimitry Andric       PhiToDelete.push_back(&MI);
1688706b4fc4SDimitry Andric     }
1689706b4fc4SDimitry Andric   }
1690706b4fc4SDimitry Andric   for (auto *P : PhiToDelete)
1691706b4fc4SDimitry Andric     P->eraseFromParent();
1692706b4fc4SDimitry Andric   InsertPt = DestBB->getFirstNonPHI();
1693706b4fc4SDimitry Andric   // Helper to clone Phi instructions into the destination block. We clone Phi
1694706b4fc4SDimitry Andric   // greedily to avoid combinatorial explosion of Phi instructions.
1695706b4fc4SDimitry Andric   auto clonePhi = [&](MachineInstr *Phi) {
1696706b4fc4SDimitry Andric     MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1697706b4fc4SDimitry Andric     DestBB->insert(InsertPt, NewMI);
1698706b4fc4SDimitry Andric     Register OrigR = Phi->getOperand(0).getReg();
1699706b4fc4SDimitry Andric     Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
1700706b4fc4SDimitry Andric     NewMI->getOperand(0).setReg(R);
1701706b4fc4SDimitry Andric     NewMI->getOperand(1).setReg(OrigR);
1702706b4fc4SDimitry Andric     NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1703706b4fc4SDimitry Andric     Remaps[OrigR] = R;
1704706b4fc4SDimitry Andric     CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1705706b4fc4SDimitry Andric     BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1706706b4fc4SDimitry Andric     PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
1707706b4fc4SDimitry Andric     return R;
1708706b4fc4SDimitry Andric   };
1709706b4fc4SDimitry Andric   for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1710706b4fc4SDimitry Andric     for (MachineOperand &MO : I->uses()) {
1711706b4fc4SDimitry Andric       if (!MO.isReg())
1712706b4fc4SDimitry Andric         continue;
1713706b4fc4SDimitry Andric       if (Remaps.count(MO.getReg()))
1714706b4fc4SDimitry Andric         MO.setReg(Remaps[MO.getReg()]);
1715706b4fc4SDimitry Andric       else {
1716706b4fc4SDimitry Andric         // If we are using a phi from the source block we need to add a new phi
1717706b4fc4SDimitry Andric         // pointing to the old one.
1718706b4fc4SDimitry Andric         MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
1719706b4fc4SDimitry Andric         if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1720706b4fc4SDimitry Andric           Register R = clonePhi(Use);
1721706b4fc4SDimitry Andric           MO.setReg(R);
1722706b4fc4SDimitry Andric         }
1723706b4fc4SDimitry Andric       }
1724706b4fc4SDimitry Andric     }
1725706b4fc4SDimitry Andric   }
1726706b4fc4SDimitry Andric }
1727706b4fc4SDimitry Andric 
1728706b4fc4SDimitry Andric Register
getPhiCanonicalReg(MachineInstr * CanonicalPhi,MachineInstr * Phi)1729706b4fc4SDimitry Andric PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
1730706b4fc4SDimitry Andric                                                   MachineInstr *Phi) {
1731706b4fc4SDimitry Andric   unsigned distance = PhiNodeLoopIteration[Phi];
1732706b4fc4SDimitry Andric   MachineInstr *CanonicalUse = CanonicalPhi;
1733cfca06d7SDimitry Andric   Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1734706b4fc4SDimitry Andric   for (unsigned I = 0; I < distance; ++I) {
1735706b4fc4SDimitry Andric     assert(CanonicalUse->isPHI());
1736706b4fc4SDimitry Andric     assert(CanonicalUse->getNumOperands() == 5);
1737706b4fc4SDimitry Andric     unsigned LoopRegIdx = 3, InitRegIdx = 1;
1738706b4fc4SDimitry Andric     if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1739706b4fc4SDimitry Andric       std::swap(LoopRegIdx, InitRegIdx);
1740cfca06d7SDimitry Andric     CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1741cfca06d7SDimitry Andric     CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1742706b4fc4SDimitry Andric   }
1743cfca06d7SDimitry Andric   return CanonicalUseReg;
1744706b4fc4SDimitry Andric }
1745706b4fc4SDimitry Andric 
peelPrologAndEpilogs()17461d5ae102SDimitry Andric void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
17471d5ae102SDimitry Andric   BitVector LS(Schedule.getNumStages(), true);
17481d5ae102SDimitry Andric   BitVector AS(Schedule.getNumStages(), true);
17491d5ae102SDimitry Andric   LiveStages[BB] = LS;
17501d5ae102SDimitry Andric   AvailableStages[BB] = AS;
17511d5ae102SDimitry Andric 
17521d5ae102SDimitry Andric   // Peel out the prologs.
17531d5ae102SDimitry Andric   LS.reset();
17541d5ae102SDimitry Andric   for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
17556f8fc217SDimitry Andric     LS[I] = true;
17561d5ae102SDimitry Andric     Prologs.push_back(peelKernel(LPD_Front));
17571d5ae102SDimitry Andric     LiveStages[Prologs.back()] = LS;
17581d5ae102SDimitry Andric     AvailableStages[Prologs.back()] = LS;
17591d5ae102SDimitry Andric   }
17601d5ae102SDimitry Andric 
17611d5ae102SDimitry Andric   // Create a block that will end up as the new loop exiting block (dominated by
17621d5ae102SDimitry Andric   // all prologs and epilogs). It will only contain PHIs, in the same order as
17631d5ae102SDimitry Andric   // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
17641d5ae102SDimitry Andric   // that the exiting block is a (sub) clone of BB. This in turn gives us the
17651d5ae102SDimitry Andric   // property that any value deffed in BB but used outside of BB is used by a
17661d5ae102SDimitry Andric   // PHI in the exiting block.
17671d5ae102SDimitry Andric   MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
1768706b4fc4SDimitry Andric   EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
17691d5ae102SDimitry Andric   // Push out the epilogs, again in reverse order.
17701d5ae102SDimitry Andric   // We can't assume anything about the minumum loop trip count at this point,
1771706b4fc4SDimitry Andric   // so emit a fairly complex epilog.
1772706b4fc4SDimitry Andric 
1773706b4fc4SDimitry Andric   // We first peel number of stages minus one epilogue. Then we remove dead
1774706b4fc4SDimitry Andric   // stages and reorder instructions based on their stage. If we have 3 stages
1775706b4fc4SDimitry Andric   // we generate first:
1776706b4fc4SDimitry Andric   // E0[3, 2, 1]
1777706b4fc4SDimitry Andric   // E1[3', 2']
1778706b4fc4SDimitry Andric   // E2[3'']
1779706b4fc4SDimitry Andric   // And then we move instructions based on their stages to have:
1780706b4fc4SDimitry Andric   // E0[3]
1781706b4fc4SDimitry Andric   // E1[2, 3']
1782706b4fc4SDimitry Andric   // E2[1, 2', 3'']
1783706b4fc4SDimitry Andric   // The transformation is legal because we only move instructions past
1784706b4fc4SDimitry Andric   // instructions of a previous loop iteration.
17851d5ae102SDimitry Andric   for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1786706b4fc4SDimitry Andric     Epilogs.push_back(peelKernel(LPD_Back));
1787706b4fc4SDimitry Andric     MachineBasicBlock *B = Epilogs.back();
1788706b4fc4SDimitry Andric     filterInstructions(B, Schedule.getNumStages() - I);
1789706b4fc4SDimitry Andric     // Keep track at which iteration each phi belongs to. We need it to know
1790706b4fc4SDimitry Andric     // what version of the variable to use during prologue/epilogue stitching.
1791706b4fc4SDimitry Andric     EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1792c0981da4SDimitry Andric     for (MachineInstr &Phi : B->phis())
1793c0981da4SDimitry Andric       PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
17941d5ae102SDimitry Andric   }
1795706b4fc4SDimitry Andric   for (size_t I = 0; I < Epilogs.size(); I++) {
1796706b4fc4SDimitry Andric     LS.reset();
1797706b4fc4SDimitry Andric     for (size_t J = I; J < Epilogs.size(); J++) {
1798706b4fc4SDimitry Andric       int Iteration = J;
1799706b4fc4SDimitry Andric       unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1800706b4fc4SDimitry Andric       // Move stage one block at a time so that Phi nodes are updated correctly.
1801706b4fc4SDimitry Andric       for (size_t K = Iteration; K > I; K--)
1802706b4fc4SDimitry Andric         moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
18036f8fc217SDimitry Andric       LS[Stage] = true;
1804706b4fc4SDimitry Andric     }
1805706b4fc4SDimitry Andric     LiveStages[Epilogs[I]] = LS;
1806706b4fc4SDimitry Andric     AvailableStages[Epilogs[I]] = AS;
18071d5ae102SDimitry Andric   }
18081d5ae102SDimitry Andric 
18091d5ae102SDimitry Andric   // Now we've defined all the prolog and epilog blocks as a fallthrough
18101d5ae102SDimitry Andric   // sequence, add the edges that will be followed if the loop trip count is
18111d5ae102SDimitry Andric   // lower than the number of stages (connecting prologs directly with epilogs).
18121d5ae102SDimitry Andric   auto PI = Prologs.begin();
18131d5ae102SDimitry Andric   auto EI = Epilogs.begin();
18141d5ae102SDimitry Andric   assert(Prologs.size() == Epilogs.size());
18151d5ae102SDimitry Andric   for (; PI != Prologs.end(); ++PI, ++EI) {
18161d5ae102SDimitry Andric     MachineBasicBlock *Pred = *(*EI)->pred_begin();
18171d5ae102SDimitry Andric     (*PI)->addSuccessor(*EI);
18181d5ae102SDimitry Andric     for (MachineInstr &MI : (*EI)->phis()) {
18191d5ae102SDimitry Andric       Register Reg = MI.getOperand(1).getReg();
18201d5ae102SDimitry Andric       MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
1821706b4fc4SDimitry Andric       if (Use && Use->getParent() == Pred) {
1822706b4fc4SDimitry Andric         MachineInstr *CanonicalUse = CanonicalMIs[Use];
1823706b4fc4SDimitry Andric         if (CanonicalUse->isPHI()) {
1824706b4fc4SDimitry Andric           // If the use comes from a phi we need to skip as many phi as the
1825706b4fc4SDimitry Andric           // distance between the epilogue and the kernel. Trace through the phi
1826706b4fc4SDimitry Andric           // chain to find the right value.
1827706b4fc4SDimitry Andric           Reg = getPhiCanonicalReg(CanonicalUse, Use);
1828706b4fc4SDimitry Andric         }
18291d5ae102SDimitry Andric         Reg = getEquivalentRegisterIn(Reg, *PI);
1830706b4fc4SDimitry Andric       }
18311d5ae102SDimitry Andric       MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
18321d5ae102SDimitry Andric       MI.addOperand(MachineOperand::CreateMBB(*PI));
18331d5ae102SDimitry Andric     }
18341d5ae102SDimitry Andric   }
18351d5ae102SDimitry Andric 
18361d5ae102SDimitry Andric   // Create a list of all blocks in order.
18371d5ae102SDimitry Andric   SmallVector<MachineBasicBlock *, 8> Blocks;
18381d5ae102SDimitry Andric   llvm::copy(PeeledFront, std::back_inserter(Blocks));
18391d5ae102SDimitry Andric   Blocks.push_back(BB);
18401d5ae102SDimitry Andric   llvm::copy(PeeledBack, std::back_inserter(Blocks));
18411d5ae102SDimitry Andric 
18421d5ae102SDimitry Andric   // Iterate in reverse order over all instructions, remapping as we go.
18431d5ae102SDimitry Andric   for (MachineBasicBlock *B : reverse(Blocks)) {
1844145449b1SDimitry Andric     for (auto I = B->instr_rbegin();
18451d5ae102SDimitry Andric          I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1846145449b1SDimitry Andric       MachineBasicBlock::reverse_instr_iterator MI = I++;
1847145449b1SDimitry Andric       rewriteUsesOf(&*MI);
18481d5ae102SDimitry Andric     }
18491d5ae102SDimitry Andric   }
1850706b4fc4SDimitry Andric   for (auto *MI : IllegalPhisToDelete) {
1851706b4fc4SDimitry Andric     if (LIS)
1852706b4fc4SDimitry Andric       LIS->RemoveMachineInstrFromMaps(*MI);
1853706b4fc4SDimitry Andric     MI->eraseFromParent();
1854706b4fc4SDimitry Andric   }
1855706b4fc4SDimitry Andric   IllegalPhisToDelete.clear();
1856706b4fc4SDimitry Andric 
18571d5ae102SDimitry Andric   // Now all remapping has been done, we're free to optimize the generated code.
18581d5ae102SDimitry Andric   for (MachineBasicBlock *B : reverse(Blocks))
18591d5ae102SDimitry Andric     EliminateDeadPhis(B, MRI, LIS);
18601d5ae102SDimitry Andric   EliminateDeadPhis(ExitingBB, MRI, LIS);
18611d5ae102SDimitry Andric }
18621d5ae102SDimitry Andric 
CreateLCSSAExitingBlock()18631d5ae102SDimitry Andric MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
18641d5ae102SDimitry Andric   MachineFunction &MF = *BB->getParent();
18651d5ae102SDimitry Andric   MachineBasicBlock *Exit = *BB->succ_begin();
18661d5ae102SDimitry Andric   if (Exit == BB)
18671d5ae102SDimitry Andric     Exit = *std::next(BB->succ_begin());
18681d5ae102SDimitry Andric 
18691d5ae102SDimitry Andric   MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
18701d5ae102SDimitry Andric   MF.insert(std::next(BB->getIterator()), NewBB);
18711d5ae102SDimitry Andric 
18721d5ae102SDimitry Andric   // Clone all phis in BB into NewBB and rewrite.
18731d5ae102SDimitry Andric   for (MachineInstr &MI : BB->phis()) {
18741d5ae102SDimitry Andric     auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
18751d5ae102SDimitry Andric     Register OldR = MI.getOperand(3).getReg();
18761d5ae102SDimitry Andric     Register R = MRI.createVirtualRegister(RC);
18771d5ae102SDimitry Andric     SmallVector<MachineInstr *, 4> Uses;
18781d5ae102SDimitry Andric     for (MachineInstr &Use : MRI.use_instructions(OldR))
18791d5ae102SDimitry Andric       if (Use.getParent() != BB)
18801d5ae102SDimitry Andric         Uses.push_back(&Use);
18811d5ae102SDimitry Andric     for (MachineInstr *Use : Uses)
18821d5ae102SDimitry Andric       Use->substituteRegister(OldR, R, /*SubIdx=*/0,
18831d5ae102SDimitry Andric                               *MRI.getTargetRegisterInfo());
18841d5ae102SDimitry Andric     MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
18851d5ae102SDimitry Andric         .addReg(OldR)
18861d5ae102SDimitry Andric         .addMBB(BB);
18871d5ae102SDimitry Andric     BlockMIs[{NewBB, &MI}] = NI;
18881d5ae102SDimitry Andric     CanonicalMIs[NI] = &MI;
18891d5ae102SDimitry Andric   }
18901d5ae102SDimitry Andric   BB->replaceSuccessor(Exit, NewBB);
18911d5ae102SDimitry Andric   Exit->replacePhiUsesWith(BB, NewBB);
18921d5ae102SDimitry Andric   NewBB->addSuccessor(Exit);
18931d5ae102SDimitry Andric 
18941d5ae102SDimitry Andric   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
18951d5ae102SDimitry Andric   SmallVector<MachineOperand, 4> Cond;
18961d5ae102SDimitry Andric   bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
18971d5ae102SDimitry Andric   (void)CanAnalyzeBr;
18981d5ae102SDimitry Andric   assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
18991d5ae102SDimitry Andric   TII->removeBranch(*BB);
19001d5ae102SDimitry Andric   TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
19011d5ae102SDimitry Andric                     Cond, DebugLoc());
19021d5ae102SDimitry Andric   TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
19031d5ae102SDimitry Andric   return NewBB;
19041d5ae102SDimitry Andric }
19051d5ae102SDimitry Andric 
19061d5ae102SDimitry Andric Register
getEquivalentRegisterIn(Register Reg,MachineBasicBlock * BB)19071d5ae102SDimitry Andric PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
19081d5ae102SDimitry Andric                                                        MachineBasicBlock *BB) {
19091d5ae102SDimitry Andric   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
1910ac9a064cSDimitry Andric   unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
19111d5ae102SDimitry Andric   return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
19121d5ae102SDimitry Andric }
19131d5ae102SDimitry Andric 
rewriteUsesOf(MachineInstr * MI)19141d5ae102SDimitry Andric void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
19151d5ae102SDimitry Andric   if (MI->isPHI()) {
19161d5ae102SDimitry Andric     // This is an illegal PHI. The loop-carried (desired) value is operand 3,
19171d5ae102SDimitry Andric     // and it is produced by this block.
19181d5ae102SDimitry Andric     Register PhiR = MI->getOperand(0).getReg();
19191d5ae102SDimitry Andric     Register R = MI->getOperand(3).getReg();
19201d5ae102SDimitry Andric     int RMIStage = getStage(MRI.getUniqueVRegDef(R));
19211d5ae102SDimitry Andric     if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
19221d5ae102SDimitry Andric       R = MI->getOperand(1).getReg();
19231d5ae102SDimitry Andric     MRI.setRegClass(R, MRI.getRegClass(PhiR));
19241d5ae102SDimitry Andric     MRI.replaceRegWith(PhiR, R);
1925706b4fc4SDimitry Andric     // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1926706b4fc4SDimitry Andric     // later to figure out how to remap registers.
1927706b4fc4SDimitry Andric     MI->getOperand(0).setReg(PhiR);
1928706b4fc4SDimitry Andric     IllegalPhisToDelete.push_back(MI);
19291d5ae102SDimitry Andric     return;
19301d5ae102SDimitry Andric   }
19311d5ae102SDimitry Andric 
19321d5ae102SDimitry Andric   int Stage = getStage(MI);
19331d5ae102SDimitry Andric   if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
19341d5ae102SDimitry Andric       LiveStages[MI->getParent()].test(Stage))
19351d5ae102SDimitry Andric     // Instruction is live, no rewriting to do.
19361d5ae102SDimitry Andric     return;
19371d5ae102SDimitry Andric 
19381d5ae102SDimitry Andric   for (MachineOperand &DefMO : MI->defs()) {
19391d5ae102SDimitry Andric     SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
19401d5ae102SDimitry Andric     for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
19411d5ae102SDimitry Andric       // Only PHIs can use values from this block by construction.
19421d5ae102SDimitry Andric       // Match with the equivalent PHI in B.
19431d5ae102SDimitry Andric       assert(UseMI.isPHI());
19441d5ae102SDimitry Andric       Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
19451d5ae102SDimitry Andric                                              MI->getParent());
19461d5ae102SDimitry Andric       Subs.emplace_back(&UseMI, Reg);
19471d5ae102SDimitry Andric     }
19481d5ae102SDimitry Andric     for (auto &Sub : Subs)
19491d5ae102SDimitry Andric       Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
19501d5ae102SDimitry Andric                                     *MRI.getTargetRegisterInfo());
19511d5ae102SDimitry Andric   }
19521d5ae102SDimitry Andric   if (LIS)
19531d5ae102SDimitry Andric     LIS->RemoveMachineInstrFromMaps(*MI);
19541d5ae102SDimitry Andric   MI->eraseFromParent();
19551d5ae102SDimitry Andric }
19561d5ae102SDimitry Andric 
fixupBranches()19571d5ae102SDimitry Andric void PeelingModuloScheduleExpander::fixupBranches() {
19581d5ae102SDimitry Andric   // Work outwards from the kernel.
19591d5ae102SDimitry Andric   bool KernelDisposed = false;
19601d5ae102SDimitry Andric   int TC = Schedule.getNumStages() - 1;
19611d5ae102SDimitry Andric   for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
19621d5ae102SDimitry Andric        ++PI, ++EI, --TC) {
19631d5ae102SDimitry Andric     MachineBasicBlock *Prolog = *PI;
19641d5ae102SDimitry Andric     MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
19651d5ae102SDimitry Andric     MachineBasicBlock *Epilog = *EI;
19661d5ae102SDimitry Andric     SmallVector<MachineOperand, 4> Cond;
19671d5ae102SDimitry Andric     TII->removeBranch(*Prolog);
1968e3b55780SDimitry Andric     std::optional<bool> StaticallyGreater =
1969cfca06d7SDimitry Andric         LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1970145449b1SDimitry Andric     if (!StaticallyGreater) {
19711d5ae102SDimitry Andric       LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
19721d5ae102SDimitry Andric       // Dynamically branch based on Cond.
19731d5ae102SDimitry Andric       TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
19741d5ae102SDimitry Andric     } else if (*StaticallyGreater == false) {
19751d5ae102SDimitry Andric       LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
19761d5ae102SDimitry Andric       // Prolog never falls through; branch to epilog and orphan interior
19771d5ae102SDimitry Andric       // blocks. Leave it to unreachable-block-elim to clean up.
19781d5ae102SDimitry Andric       Prolog->removeSuccessor(Fallthrough);
19791d5ae102SDimitry Andric       for (MachineInstr &P : Fallthrough->phis()) {
1980145449b1SDimitry Andric         P.removeOperand(2);
1981145449b1SDimitry Andric         P.removeOperand(1);
19821d5ae102SDimitry Andric       }
19831d5ae102SDimitry Andric       TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
19841d5ae102SDimitry Andric       KernelDisposed = true;
19851d5ae102SDimitry Andric     } else {
19861d5ae102SDimitry Andric       LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
19871d5ae102SDimitry Andric       // Prolog always falls through; remove incoming values in epilog.
19881d5ae102SDimitry Andric       Prolog->removeSuccessor(Epilog);
19891d5ae102SDimitry Andric       for (MachineInstr &P : Epilog->phis()) {
1990145449b1SDimitry Andric         P.removeOperand(4);
1991145449b1SDimitry Andric         P.removeOperand(3);
19921d5ae102SDimitry Andric       }
19931d5ae102SDimitry Andric     }
19941d5ae102SDimitry Andric   }
19951d5ae102SDimitry Andric 
19961d5ae102SDimitry Andric   if (!KernelDisposed) {
1997cfca06d7SDimitry Andric     LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1998cfca06d7SDimitry Andric     LoopInfo->setPreheader(Prologs.back());
19991d5ae102SDimitry Andric   } else {
2000cfca06d7SDimitry Andric     LoopInfo->disposed();
20011d5ae102SDimitry Andric   }
20021d5ae102SDimitry Andric }
20031d5ae102SDimitry Andric 
rewriteKernel()20041d5ae102SDimitry Andric void PeelingModuloScheduleExpander::rewriteKernel() {
2005344a3780SDimitry Andric   KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
20061d5ae102SDimitry Andric   KR.rewrite();
20071d5ae102SDimitry Andric }
20081d5ae102SDimitry Andric 
expand()20091d5ae102SDimitry Andric void PeelingModuloScheduleExpander::expand() {
20101d5ae102SDimitry Andric   BB = Schedule.getLoop()->getTopBlock();
20111d5ae102SDimitry Andric   Preheader = Schedule.getLoop()->getLoopPreheader();
20121d5ae102SDimitry Andric   LLVM_DEBUG(Schedule.dump());
2013cfca06d7SDimitry Andric   LoopInfo = TII->analyzeLoopForPipelining(BB);
2014cfca06d7SDimitry Andric   assert(LoopInfo);
20151d5ae102SDimitry Andric 
20161d5ae102SDimitry Andric   rewriteKernel();
20171d5ae102SDimitry Andric   peelPrologAndEpilogs();
20181d5ae102SDimitry Andric   fixupBranches();
20191d5ae102SDimitry Andric }
20201d5ae102SDimitry Andric 
validateAgainstModuloScheduleExpander()20211d5ae102SDimitry Andric void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
20221d5ae102SDimitry Andric   BB = Schedule.getLoop()->getTopBlock();
20231d5ae102SDimitry Andric   Preheader = Schedule.getLoop()->getLoopPreheader();
20241d5ae102SDimitry Andric 
20251d5ae102SDimitry Andric   // Dump the schedule before we invalidate and remap all its instructions.
20261d5ae102SDimitry Andric   // Stash it in a string so we can print it if we found an error.
20271d5ae102SDimitry Andric   std::string ScheduleDump;
20281d5ae102SDimitry Andric   raw_string_ostream OS(ScheduleDump);
20291d5ae102SDimitry Andric   Schedule.print(OS);
20301d5ae102SDimitry Andric   OS.flush();
20311d5ae102SDimitry Andric 
20321d5ae102SDimitry Andric   // First, run the normal ModuleScheduleExpander. We don't support any
20331d5ae102SDimitry Andric   // InstrChanges.
20341d5ae102SDimitry Andric   assert(LIS && "Requires LiveIntervals!");
20351d5ae102SDimitry Andric   ModuloScheduleExpander MSE(MF, Schedule, *LIS,
20361d5ae102SDimitry Andric                              ModuloScheduleExpander::InstrChangesTy());
20371d5ae102SDimitry Andric   MSE.expand();
20381d5ae102SDimitry Andric   MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
20391d5ae102SDimitry Andric   if (!ExpandedKernel) {
20401d5ae102SDimitry Andric     // The expander optimized away the kernel. We can't do any useful checking.
20411d5ae102SDimitry Andric     MSE.cleanup();
20421d5ae102SDimitry Andric     return;
20431d5ae102SDimitry Andric   }
20441d5ae102SDimitry Andric   // Before running the KernelRewriter, re-add BB into the CFG.
20451d5ae102SDimitry Andric   Preheader->addSuccessor(BB);
20461d5ae102SDimitry Andric 
20471d5ae102SDimitry Andric   // Now run the new expansion algorithm.
2048344a3780SDimitry Andric   KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
20491d5ae102SDimitry Andric   KR.rewrite();
20501d5ae102SDimitry Andric   peelPrologAndEpilogs();
20511d5ae102SDimitry Andric 
20521d5ae102SDimitry Andric   // Collect all illegal phis that the new algorithm created. We'll give these
20531d5ae102SDimitry Andric   // to KernelOperandInfo.
20541d5ae102SDimitry Andric   SmallPtrSet<MachineInstr *, 4> IllegalPhis;
20551d5ae102SDimitry Andric   for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
20561d5ae102SDimitry Andric     if (NI->isPHI())
20571d5ae102SDimitry Andric       IllegalPhis.insert(&*NI);
20581d5ae102SDimitry Andric   }
20591d5ae102SDimitry Andric 
20601d5ae102SDimitry Andric   // Co-iterate across both kernels. We expect them to be identical apart from
20611d5ae102SDimitry Andric   // phis and full COPYs (we look through both).
20621d5ae102SDimitry Andric   SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
20631d5ae102SDimitry Andric   auto OI = ExpandedKernel->begin();
20641d5ae102SDimitry Andric   auto NI = BB->begin();
20651d5ae102SDimitry Andric   for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
20661d5ae102SDimitry Andric     while (OI->isPHI() || OI->isFullCopy())
20671d5ae102SDimitry Andric       ++OI;
20681d5ae102SDimitry Andric     while (NI->isPHI() || NI->isFullCopy())
20691d5ae102SDimitry Andric       ++NI;
20701d5ae102SDimitry Andric     assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
20711d5ae102SDimitry Andric     // Analyze every operand separately.
20721d5ae102SDimitry Andric     for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
20731d5ae102SDimitry Andric          OOpI != OI->operands_end(); ++OOpI, ++NOpI)
20741d5ae102SDimitry Andric       KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
20751d5ae102SDimitry Andric                         KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
20761d5ae102SDimitry Andric   }
20771d5ae102SDimitry Andric 
20781d5ae102SDimitry Andric   bool Failed = false;
20791d5ae102SDimitry Andric   for (auto &OldAndNew : KOIs) {
20801d5ae102SDimitry Andric     if (OldAndNew.first == OldAndNew.second)
20811d5ae102SDimitry Andric       continue;
20821d5ae102SDimitry Andric     Failed = true;
20831d5ae102SDimitry Andric     errs() << "Modulo kernel validation error: [\n";
20841d5ae102SDimitry Andric     errs() << " [golden] ";
20851d5ae102SDimitry Andric     OldAndNew.first.print(errs());
20861d5ae102SDimitry Andric     errs() << "          ";
20871d5ae102SDimitry Andric     OldAndNew.second.print(errs());
20881d5ae102SDimitry Andric     errs() << "]\n";
20891d5ae102SDimitry Andric   }
20901d5ae102SDimitry Andric 
20911d5ae102SDimitry Andric   if (Failed) {
20921d5ae102SDimitry Andric     errs() << "Golden reference kernel:\n";
20931d5ae102SDimitry Andric     ExpandedKernel->print(errs());
20941d5ae102SDimitry Andric     errs() << "New kernel:\n";
20951d5ae102SDimitry Andric     BB->print(errs());
20961d5ae102SDimitry Andric     errs() << ScheduleDump;
20971d5ae102SDimitry Andric     report_fatal_error(
20981d5ae102SDimitry Andric         "Modulo kernel validation (-pipeliner-experimental-cg) failed");
20991d5ae102SDimitry Andric   }
21001d5ae102SDimitry Andric 
21011d5ae102SDimitry Andric   // Cleanup by removing BB from the CFG again as the original
21021d5ae102SDimitry Andric   // ModuloScheduleExpander intended.
21031d5ae102SDimitry Andric   Preheader->removeSuccessor(BB);
21041d5ae102SDimitry Andric   MSE.cleanup();
21051d5ae102SDimitry Andric }
21061d5ae102SDimitry Andric 
cloneInstr(MachineInstr * OldMI)2107ac9a064cSDimitry Andric MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2108ac9a064cSDimitry Andric   MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2109ac9a064cSDimitry Andric 
2110ac9a064cSDimitry Andric   // TODO: Offset information needs to be corrected.
2111ac9a064cSDimitry Andric   NewMI->dropMemRefs(MF);
2112ac9a064cSDimitry Andric 
2113ac9a064cSDimitry Andric   return NewMI;
2114ac9a064cSDimitry Andric }
2115ac9a064cSDimitry Andric 
2116ac9a064cSDimitry Andric /// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2117ac9a064cSDimitry Andric /// If it is already dedicated exit, return it. Otherwise, insert a new
2118ac9a064cSDimitry Andric /// block between them and return the new block.
createDedicatedExit(MachineBasicBlock * Loop,MachineBasicBlock * Exit)2119ac9a064cSDimitry Andric static MachineBasicBlock *createDedicatedExit(MachineBasicBlock *Loop,
2120ac9a064cSDimitry Andric                                               MachineBasicBlock *Exit) {
2121ac9a064cSDimitry Andric   if (Exit->pred_size() == 1)
2122ac9a064cSDimitry Andric     return Exit;
2123ac9a064cSDimitry Andric 
2124ac9a064cSDimitry Andric   MachineFunction *MF = Loop->getParent();
2125ac9a064cSDimitry Andric   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2126ac9a064cSDimitry Andric 
2127ac9a064cSDimitry Andric   MachineBasicBlock *NewExit =
2128ac9a064cSDimitry Andric       MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2129ac9a064cSDimitry Andric   MF->insert(Loop->getIterator(), NewExit);
2130ac9a064cSDimitry Andric 
2131ac9a064cSDimitry Andric   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2132ac9a064cSDimitry Andric   SmallVector<MachineOperand, 4> Cond;
2133ac9a064cSDimitry Andric   TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2134ac9a064cSDimitry Andric   if (TBB == Loop)
2135ac9a064cSDimitry Andric     FBB = NewExit;
2136ac9a064cSDimitry Andric   else if (FBB == Loop)
2137ac9a064cSDimitry Andric     TBB = NewExit;
2138ac9a064cSDimitry Andric   else
2139ac9a064cSDimitry Andric     llvm_unreachable("unexpected loop structure");
2140ac9a064cSDimitry Andric   TII->removeBranch(*Loop);
2141ac9a064cSDimitry Andric   TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2142ac9a064cSDimitry Andric   Loop->replaceSuccessor(Exit, NewExit);
2143ac9a064cSDimitry Andric   TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2144ac9a064cSDimitry Andric   NewExit->addSuccessor(Exit);
2145ac9a064cSDimitry Andric 
2146ac9a064cSDimitry Andric   Exit->replacePhiUsesWith(Loop, NewExit);
2147ac9a064cSDimitry Andric 
2148ac9a064cSDimitry Andric   return NewExit;
2149ac9a064cSDimitry Andric }
2150ac9a064cSDimitry Andric 
2151ac9a064cSDimitry Andric /// Insert branch code into the end of MBB. It branches to GreaterThan if the
2152ac9a064cSDimitry Andric /// remaining trip count for instructions in LastStage0Insts is greater than
2153ac9a064cSDimitry Andric /// RequiredTC, and to Otherwise otherwise.
insertCondBranch(MachineBasicBlock & MBB,int RequiredTC,InstrMapTy & LastStage0Insts,MachineBasicBlock & GreaterThan,MachineBasicBlock & Otherwise)2154ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2155ac9a064cSDimitry Andric                                                  int RequiredTC,
2156ac9a064cSDimitry Andric                                                  InstrMapTy &LastStage0Insts,
2157ac9a064cSDimitry Andric                                                  MachineBasicBlock &GreaterThan,
2158ac9a064cSDimitry Andric                                                  MachineBasicBlock &Otherwise) {
2159ac9a064cSDimitry Andric   SmallVector<MachineOperand, 4> Cond;
2160ac9a064cSDimitry Andric   LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2161ac9a064cSDimitry Andric                                                       LastStage0Insts);
2162ac9a064cSDimitry Andric 
2163ac9a064cSDimitry Andric   if (SwapBranchTargetsMVE) {
2164ac9a064cSDimitry Andric     // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2165ac9a064cSDimitry Andric     // FBB for optimal performance.
2166ac9a064cSDimitry Andric     if (TII->reverseBranchCondition(Cond))
2167ac9a064cSDimitry Andric       llvm_unreachable("can not reverse branch condition");
2168ac9a064cSDimitry Andric     TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2169ac9a064cSDimitry Andric   } else {
2170ac9a064cSDimitry Andric     TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2171ac9a064cSDimitry Andric   }
2172ac9a064cSDimitry Andric }
2173ac9a064cSDimitry Andric 
2174ac9a064cSDimitry Andric /// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2175ac9a064cSDimitry Andric /// other necessary blocks. The control flow is modified to execute the
2176ac9a064cSDimitry Andric /// pipelined loop if the trip count satisfies the condition, otherwise the
2177ac9a064cSDimitry Andric /// original loop. The original loop is also used to execute the remainder
2178ac9a064cSDimitry Andric /// iterations which occur due to unrolling.
generatePipelinedLoop()2179ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2180ac9a064cSDimitry Andric   // The control flow for pipelining with MVE:
2181ac9a064cSDimitry Andric   //
2182ac9a064cSDimitry Andric   // OrigPreheader:
2183ac9a064cSDimitry Andric   //   // The block that is originally the loop preheader
2184ac9a064cSDimitry Andric   //   goto Check
2185ac9a064cSDimitry Andric   //
2186ac9a064cSDimitry Andric   // Check:
2187ac9a064cSDimitry Andric   //   // Check whether the trip count satisfies the requirements to pipeline.
2188ac9a064cSDimitry Andric   //   if (LoopCounter > NumStages + NumUnroll - 2)
2189ac9a064cSDimitry Andric   //     // The minimum number of iterations to pipeline =
2190ac9a064cSDimitry Andric   //     //   iterations executed in prolog/epilog (NumStages-1) +
2191ac9a064cSDimitry Andric   //     //   iterations executed in one kernel run (NumUnroll)
2192ac9a064cSDimitry Andric   //     goto Prolog
2193ac9a064cSDimitry Andric   //   // fallback to the original loop
2194ac9a064cSDimitry Andric   //   goto NewPreheader
2195ac9a064cSDimitry Andric   //
2196ac9a064cSDimitry Andric   // Prolog:
2197ac9a064cSDimitry Andric   //   // All prolog stages. There are no direct branches to the epilogue.
2198ac9a064cSDimitry Andric   //   goto NewKernel
2199ac9a064cSDimitry Andric   //
2200ac9a064cSDimitry Andric   // NewKernel:
2201ac9a064cSDimitry Andric   //   // NumUnroll copies of the kernel
2202ac9a064cSDimitry Andric   //   if (LoopCounter > MVE-1)
2203ac9a064cSDimitry Andric   //     goto NewKernel
2204ac9a064cSDimitry Andric   //   goto Epilog
2205ac9a064cSDimitry Andric   //
2206ac9a064cSDimitry Andric   // Epilog:
2207ac9a064cSDimitry Andric   //   // All epilog stages.
2208ac9a064cSDimitry Andric   //   if (LoopCounter > 0)
2209ac9a064cSDimitry Andric   //     // The remainder is executed in the original loop
2210ac9a064cSDimitry Andric   //     goto NewPreheader
2211ac9a064cSDimitry Andric   //   goto NewExit
2212ac9a064cSDimitry Andric   //
2213ac9a064cSDimitry Andric   // NewPreheader:
2214ac9a064cSDimitry Andric   //   // Newly created preheader for the original loop.
2215ac9a064cSDimitry Andric   //   // The initial values of the phis in the loop are merged from two paths.
2216ac9a064cSDimitry Andric   //   NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2217ac9a064cSDimitry Andric   //   goto OrigKernel
2218ac9a064cSDimitry Andric   //
2219ac9a064cSDimitry Andric   // OrigKernel:
2220ac9a064cSDimitry Andric   //   // The original loop block.
2221ac9a064cSDimitry Andric   //   if (LoopCounter != 0)
2222ac9a064cSDimitry Andric   //     goto OrigKernel
2223ac9a064cSDimitry Andric   //   goto NewExit
2224ac9a064cSDimitry Andric   //
2225ac9a064cSDimitry Andric   // NewExit:
2226ac9a064cSDimitry Andric   //   // Newly created dedicated exit for the original loop.
2227ac9a064cSDimitry Andric   //   // Merge values which are referenced after the loop
2228ac9a064cSDimitry Andric   //   Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2229ac9a064cSDimitry Andric   //   goto OrigExit
2230ac9a064cSDimitry Andric   //
2231ac9a064cSDimitry Andric   // OrigExit:
2232ac9a064cSDimitry Andric   //   // The block that is originally the loop exit.
2233ac9a064cSDimitry Andric   //   // If it is already deicated exit, NewExit is not created.
2234ac9a064cSDimitry Andric 
2235ac9a064cSDimitry Andric   // An example of where each stage is executed:
2236ac9a064cSDimitry Andric   // Assume #Stages 3, #MVE 4, #Iterations 12
2237ac9a064cSDimitry Andric   // Iter   0 1 2 3 4 5 6 7 8 9 10-11
2238ac9a064cSDimitry Andric   // -------------------------------------------------
2239ac9a064cSDimitry Andric   // Stage  0                          Prolog#0
2240ac9a064cSDimitry Andric   // Stage  1 0                        Prolog#1
2241ac9a064cSDimitry Andric   // Stage  2 1 0                      Kernel Unroll#0 Iter#0
2242ac9a064cSDimitry Andric   // Stage    2 1 0                    Kernel Unroll#1 Iter#0
2243ac9a064cSDimitry Andric   // Stage      2 1 0                  Kernel Unroll#2 Iter#0
2244ac9a064cSDimitry Andric   // Stage        2 1 0                Kernel Unroll#3 Iter#0
2245ac9a064cSDimitry Andric   // Stage          2 1 0              Kernel Unroll#0 Iter#1
2246ac9a064cSDimitry Andric   // Stage            2 1 0            Kernel Unroll#1 Iter#1
2247ac9a064cSDimitry Andric   // Stage              2 1 0          Kernel Unroll#2 Iter#1
2248ac9a064cSDimitry Andric   // Stage                2 1 0        Kernel Unroll#3 Iter#1
2249ac9a064cSDimitry Andric   // Stage                  2 1        Epilog#0
2250ac9a064cSDimitry Andric   // Stage                    2        Epilog#1
2251ac9a064cSDimitry Andric   // Stage                      0-2    OrigKernel
2252ac9a064cSDimitry Andric 
2253ac9a064cSDimitry Andric   LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2254ac9a064cSDimitry Andric   assert(LoopInfo && "Must be able to analyze loop!");
2255ac9a064cSDimitry Andric 
2256ac9a064cSDimitry Andric   calcNumUnroll();
2257ac9a064cSDimitry Andric 
2258ac9a064cSDimitry Andric   Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2259ac9a064cSDimitry Andric   Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2260ac9a064cSDimitry Andric   NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2261ac9a064cSDimitry Andric   Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2262ac9a064cSDimitry Andric   NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2263ac9a064cSDimitry Andric 
2264ac9a064cSDimitry Andric   MF.insert(OrigKernel->getIterator(), Check);
2265ac9a064cSDimitry Andric   MF.insert(OrigKernel->getIterator(), Prolog);
2266ac9a064cSDimitry Andric   MF.insert(OrigKernel->getIterator(), NewKernel);
2267ac9a064cSDimitry Andric   MF.insert(OrigKernel->getIterator(), Epilog);
2268ac9a064cSDimitry Andric   MF.insert(OrigKernel->getIterator(), NewPreheader);
2269ac9a064cSDimitry Andric 
2270ac9a064cSDimitry Andric   NewExit = createDedicatedExit(OrigKernel, OrigExit);
2271ac9a064cSDimitry Andric 
2272ac9a064cSDimitry Andric   NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2273ac9a064cSDimitry Andric   TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2274ac9a064cSDimitry Andric 
2275ac9a064cSDimitry Andric   OrigPreheader->addSuccessor(Check);
2276ac9a064cSDimitry Andric   TII->removeBranch(*OrigPreheader);
2277ac9a064cSDimitry Andric   TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2278ac9a064cSDimitry Andric 
2279ac9a064cSDimitry Andric   Check->addSuccessor(Prolog);
2280ac9a064cSDimitry Andric   Check->addSuccessor(NewPreheader);
2281ac9a064cSDimitry Andric 
2282ac9a064cSDimitry Andric   Prolog->addSuccessor(NewKernel);
2283ac9a064cSDimitry Andric 
2284ac9a064cSDimitry Andric   NewKernel->addSuccessor(NewKernel);
2285ac9a064cSDimitry Andric   NewKernel->addSuccessor(Epilog);
2286ac9a064cSDimitry Andric 
2287ac9a064cSDimitry Andric   Epilog->addSuccessor(NewPreheader);
2288ac9a064cSDimitry Andric   Epilog->addSuccessor(NewExit);
2289ac9a064cSDimitry Andric 
2290ac9a064cSDimitry Andric   InstrMapTy LastStage0Insts;
2291ac9a064cSDimitry Andric   insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2292ac9a064cSDimitry Andric                    LastStage0Insts, *Prolog, *NewPreheader);
2293ac9a064cSDimitry Andric 
2294ac9a064cSDimitry Andric   // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2295ac9a064cSDimitry Andric   // register#
2296ac9a064cSDimitry Andric   SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2297ac9a064cSDimitry Andric   generateProlog(PrologVRMap);
2298ac9a064cSDimitry Andric   generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2299ac9a064cSDimitry Andric   generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2300ac9a064cSDimitry Andric }
2301ac9a064cSDimitry Andric 
2302ac9a064cSDimitry Andric /// Replace MI's use operands according to the maps.
updateInstrUse(MachineInstr * MI,int StageNum,int PhaseNum,SmallVectorImpl<ValueMapTy> & CurVRMap,SmallVectorImpl<ValueMapTy> * PrevVRMap)2303ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::updateInstrUse(
2304ac9a064cSDimitry Andric     MachineInstr *MI, int StageNum, int PhaseNum,
2305ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &CurVRMap,
2306ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2307ac9a064cSDimitry Andric   // If MI is in the prolog/kernel/epilog block, CurVRMap is
2308ac9a064cSDimitry Andric   // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2309ac9a064cSDimitry Andric   // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2310ac9a064cSDimitry Andric   // Refer to the appropriate map according to the stage difference between
2311ac9a064cSDimitry Andric   // MI and the definition of an operand.
2312ac9a064cSDimitry Andric 
2313ac9a064cSDimitry Andric   for (MachineOperand &UseMO : MI->uses()) {
2314ac9a064cSDimitry Andric     if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2315ac9a064cSDimitry Andric       continue;
2316ac9a064cSDimitry Andric     int DiffStage = 0;
2317ac9a064cSDimitry Andric     Register OrigReg = UseMO.getReg();
2318ac9a064cSDimitry Andric     MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2319ac9a064cSDimitry Andric     if (!DefInst || DefInst->getParent() != OrigKernel)
2320ac9a064cSDimitry Andric       continue;
2321ac9a064cSDimitry Andric     unsigned InitReg = 0;
2322ac9a064cSDimitry Andric     unsigned DefReg = OrigReg;
2323ac9a064cSDimitry Andric     if (DefInst->isPHI()) {
2324ac9a064cSDimitry Andric       ++DiffStage;
2325ac9a064cSDimitry Andric       unsigned LoopReg;
2326ac9a064cSDimitry Andric       getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2327ac9a064cSDimitry Andric       // LoopReg is guaranteed to be defined within the loop by canApply()
2328ac9a064cSDimitry Andric       DefReg = LoopReg;
2329ac9a064cSDimitry Andric       DefInst = MRI.getVRegDef(LoopReg);
2330ac9a064cSDimitry Andric     }
2331ac9a064cSDimitry Andric     unsigned DefStageNum = Schedule.getStage(DefInst);
2332ac9a064cSDimitry Andric     DiffStage += StageNum - DefStageNum;
2333ac9a064cSDimitry Andric     Register NewReg;
2334ac9a064cSDimitry Andric     if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2335ac9a064cSDimitry Andric       // NewReg is defined in a previous phase of the same block
2336ac9a064cSDimitry Andric       NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2337ac9a064cSDimitry Andric     else if (!PrevVRMap)
2338ac9a064cSDimitry Andric       // Since this is the first iteration, refer the initial register of the
2339ac9a064cSDimitry Andric       // loop
2340ac9a064cSDimitry Andric       NewReg = InitReg;
2341ac9a064cSDimitry Andric     else
2342ac9a064cSDimitry Andric       // Cases where DiffStage is larger than PhaseNum.
2343ac9a064cSDimitry Andric       // If MI is in the kernel block, the value is defined by the previous
2344ac9a064cSDimitry Andric       // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2345ac9a064cSDimitry Andric       // value is defined in the kernel block and KernelVRMap is referenced.
2346ac9a064cSDimitry Andric       NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2347ac9a064cSDimitry Andric 
2348ac9a064cSDimitry Andric     const TargetRegisterClass *NRC =
2349ac9a064cSDimitry Andric         MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2350ac9a064cSDimitry Andric     if (NRC)
2351ac9a064cSDimitry Andric       UseMO.setReg(NewReg);
2352ac9a064cSDimitry Andric     else {
2353ac9a064cSDimitry Andric       Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2354ac9a064cSDimitry Andric       BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
2355ac9a064cSDimitry Andric               SplitReg)
2356ac9a064cSDimitry Andric           .addReg(NewReg);
2357ac9a064cSDimitry Andric       UseMO.setReg(SplitReg);
2358ac9a064cSDimitry Andric     }
2359ac9a064cSDimitry Andric   }
2360ac9a064cSDimitry Andric }
2361ac9a064cSDimitry Andric 
2362ac9a064cSDimitry Andric /// Return a phi if Reg is referenced by the phi.
2363ac9a064cSDimitry Andric /// canApply() guarantees that at most only one such phi exists.
getLoopPhiUser(Register Reg,MachineBasicBlock * Loop)2364ac9a064cSDimitry Andric static MachineInstr *getLoopPhiUser(Register Reg, MachineBasicBlock *Loop) {
2365ac9a064cSDimitry Andric   for (MachineInstr &Phi : Loop->phis()) {
2366ac9a064cSDimitry Andric     unsigned InitVal, LoopVal;
2367ac9a064cSDimitry Andric     getPhiRegs(Phi, Loop, InitVal, LoopVal);
2368ac9a064cSDimitry Andric     if (LoopVal == Reg)
2369ac9a064cSDimitry Andric       return &Phi;
2370ac9a064cSDimitry Andric   }
2371ac9a064cSDimitry Andric   return nullptr;
2372ac9a064cSDimitry Andric }
2373ac9a064cSDimitry Andric 
2374ac9a064cSDimitry Andric /// Generate phis for registers defined by OrigMI.
generatePhi(MachineInstr * OrigMI,int UnrollNum,SmallVectorImpl<ValueMapTy> & PrologVRMap,SmallVectorImpl<ValueMapTy> & KernelVRMap,SmallVectorImpl<ValueMapTy> & PhiVRMap)2375ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generatePhi(
2376ac9a064cSDimitry Andric     MachineInstr *OrigMI, int UnrollNum,
2377ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &PrologVRMap,
2378ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &KernelVRMap,
2379ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2380ac9a064cSDimitry Andric   int StageNum = Schedule.getStage(OrigMI);
2381ac9a064cSDimitry Andric   bool UsePrologReg;
2382ac9a064cSDimitry Andric   if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2383ac9a064cSDimitry Andric     UsePrologReg = true;
2384ac9a064cSDimitry Andric   else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2385ac9a064cSDimitry Andric     UsePrologReg = false;
2386ac9a064cSDimitry Andric   else
2387ac9a064cSDimitry Andric     return;
2388ac9a064cSDimitry Andric 
2389ac9a064cSDimitry Andric   // Examples that show which stages are merged by phi.
2390ac9a064cSDimitry Andric   // Meaning of the symbol following the stage number:
2391ac9a064cSDimitry Andric   //   a/b: Stages with the same letter are merged (UsePrologReg == true)
2392ac9a064cSDimitry Andric   //   +: Merged with the initial value (UsePrologReg == false)
2393ac9a064cSDimitry Andric   //   *: No phis required
2394ac9a064cSDimitry Andric   //
2395ac9a064cSDimitry Andric   // #Stages 3, #MVE 4
2396ac9a064cSDimitry Andric   // Iter   0 1 2 3 4 5 6 7 8
2397ac9a064cSDimitry Andric   // -----------------------------------------
2398ac9a064cSDimitry Andric   // Stage  0a                 Prolog#0
2399ac9a064cSDimitry Andric   // Stage  1a 0b              Prolog#1
2400ac9a064cSDimitry Andric   // Stage  2* 1* 0*           Kernel Unroll#0
2401ac9a064cSDimitry Andric   // Stage     2* 1* 0+        Kernel Unroll#1
2402ac9a064cSDimitry Andric   // Stage        2* 1+ 0a     Kernel Unroll#2
2403ac9a064cSDimitry Andric   // Stage           2+ 1a 0b  Kernel Unroll#3
2404ac9a064cSDimitry Andric   //
2405ac9a064cSDimitry Andric   // #Stages 3, #MVE 2
2406ac9a064cSDimitry Andric   // Iter   0 1 2 3 4 5 6 7 8
2407ac9a064cSDimitry Andric   // -----------------------------------------
2408ac9a064cSDimitry Andric   // Stage  0a                 Prolog#0
2409ac9a064cSDimitry Andric   // Stage  1a 0b              Prolog#1
2410ac9a064cSDimitry Andric   // Stage  2* 1+ 0a           Kernel Unroll#0
2411ac9a064cSDimitry Andric   // Stage     2+ 1a 0b        Kernel Unroll#1
2412ac9a064cSDimitry Andric   //
2413ac9a064cSDimitry Andric   // #Stages 3, #MVE 1
2414ac9a064cSDimitry Andric   // Iter   0 1 2 3 4 5 6 7 8
2415ac9a064cSDimitry Andric   // -----------------------------------------
2416ac9a064cSDimitry Andric   // Stage  0*                 Prolog#0
2417ac9a064cSDimitry Andric   // Stage  1a 0b              Prolog#1
2418ac9a064cSDimitry Andric   // Stage  2+ 1a 0b           Kernel Unroll#0
2419ac9a064cSDimitry Andric 
2420ac9a064cSDimitry Andric   for (MachineOperand &DefMO : OrigMI->defs()) {
2421ac9a064cSDimitry Andric     if (!DefMO.isReg() || DefMO.isDead())
2422ac9a064cSDimitry Andric       continue;
2423ac9a064cSDimitry Andric     Register OrigReg = DefMO.getReg();
2424ac9a064cSDimitry Andric     auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2425ac9a064cSDimitry Andric     if (NewReg == KernelVRMap[UnrollNum].end())
2426ac9a064cSDimitry Andric       continue;
2427ac9a064cSDimitry Andric     Register CorrespondReg;
2428ac9a064cSDimitry Andric     if (UsePrologReg) {
2429ac9a064cSDimitry Andric       int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2430ac9a064cSDimitry Andric       CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2431ac9a064cSDimitry Andric     } else {
2432ac9a064cSDimitry Andric       MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2433ac9a064cSDimitry Andric       if (!Phi)
2434ac9a064cSDimitry Andric         continue;
2435ac9a064cSDimitry Andric       CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2436ac9a064cSDimitry Andric     }
2437ac9a064cSDimitry Andric 
2438ac9a064cSDimitry Andric     assert(CorrespondReg.isValid());
2439ac9a064cSDimitry Andric     Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2440ac9a064cSDimitry Andric     BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2441ac9a064cSDimitry Andric             TII->get(TargetOpcode::PHI), PhiReg)
2442ac9a064cSDimitry Andric         .addReg(NewReg->second)
2443ac9a064cSDimitry Andric         .addMBB(NewKernel)
2444ac9a064cSDimitry Andric         .addReg(CorrespondReg)
2445ac9a064cSDimitry Andric         .addMBB(Prolog);
2446ac9a064cSDimitry Andric     PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2447ac9a064cSDimitry Andric   }
2448ac9a064cSDimitry Andric }
2449ac9a064cSDimitry Andric 
replacePhiSrc(MachineInstr & Phi,Register OrigReg,Register NewReg,MachineBasicBlock * NewMBB)2450ac9a064cSDimitry Andric static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2451ac9a064cSDimitry Andric                           MachineBasicBlock *NewMBB) {
2452ac9a064cSDimitry Andric   for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2453ac9a064cSDimitry Andric     if (Phi.getOperand(Idx).getReg() == OrigReg) {
2454ac9a064cSDimitry Andric       Phi.getOperand(Idx).setReg(NewReg);
2455ac9a064cSDimitry Andric       Phi.getOperand(Idx + 1).setMBB(NewMBB);
2456ac9a064cSDimitry Andric       return;
2457ac9a064cSDimitry Andric     }
2458ac9a064cSDimitry Andric   }
2459ac9a064cSDimitry Andric }
2460ac9a064cSDimitry Andric 
2461ac9a064cSDimitry Andric /// Generate phis that merge values from multiple routes
mergeRegUsesAfterPipeline(Register OrigReg,Register NewReg)2462ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2463ac9a064cSDimitry Andric                                                           Register NewReg) {
2464ac9a064cSDimitry Andric   SmallVector<MachineOperand *> UsesAfterLoop;
2465ac9a064cSDimitry Andric   SmallVector<MachineInstr *> LoopPhis;
2466ac9a064cSDimitry Andric   for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2467ac9a064cSDimitry Andric                                          E = MRI.use_end();
2468ac9a064cSDimitry Andric        I != E; ++I) {
2469ac9a064cSDimitry Andric     MachineOperand &O = *I;
2470ac9a064cSDimitry Andric     if (O.getParent()->getParent() != OrigKernel &&
2471ac9a064cSDimitry Andric         O.getParent()->getParent() != Prolog &&
2472ac9a064cSDimitry Andric         O.getParent()->getParent() != NewKernel &&
2473ac9a064cSDimitry Andric         O.getParent()->getParent() != Epilog)
2474ac9a064cSDimitry Andric       UsesAfterLoop.push_back(&O);
2475ac9a064cSDimitry Andric     if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2476ac9a064cSDimitry Andric       LoopPhis.push_back(O.getParent());
2477ac9a064cSDimitry Andric   }
2478ac9a064cSDimitry Andric 
2479ac9a064cSDimitry Andric   // Merge the route that only execute the pipelined loop (when there are no
2480ac9a064cSDimitry Andric   // remaining iterations) with the route that execute the original loop.
2481ac9a064cSDimitry Andric   if (!UsesAfterLoop.empty()) {
2482ac9a064cSDimitry Andric     Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2483ac9a064cSDimitry Andric     BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2484ac9a064cSDimitry Andric             TII->get(TargetOpcode::PHI), PhiReg)
2485ac9a064cSDimitry Andric         .addReg(OrigReg)
2486ac9a064cSDimitry Andric         .addMBB(OrigKernel)
2487ac9a064cSDimitry Andric         .addReg(NewReg)
2488ac9a064cSDimitry Andric         .addMBB(Epilog);
2489ac9a064cSDimitry Andric 
2490ac9a064cSDimitry Andric     for (MachineOperand *MO : UsesAfterLoop)
2491ac9a064cSDimitry Andric       MO->setReg(PhiReg);
2492ac9a064cSDimitry Andric 
2493ac9a064cSDimitry Andric     if (!LIS.hasInterval(PhiReg))
2494ac9a064cSDimitry Andric       LIS.createEmptyInterval(PhiReg);
2495ac9a064cSDimitry Andric   }
2496ac9a064cSDimitry Andric 
2497ac9a064cSDimitry Andric   // Merge routes from the pipelined loop and the bypassed route before the
2498ac9a064cSDimitry Andric   // original loop
2499ac9a064cSDimitry Andric   if (!LoopPhis.empty()) {
2500ac9a064cSDimitry Andric     for (MachineInstr *Phi : LoopPhis) {
2501ac9a064cSDimitry Andric       unsigned InitReg, LoopReg;
2502ac9a064cSDimitry Andric       getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2503ac9a064cSDimitry Andric       Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2504ac9a064cSDimitry Andric       BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(),
2505ac9a064cSDimitry Andric               TII->get(TargetOpcode::PHI), NewInit)
2506ac9a064cSDimitry Andric           .addReg(InitReg)
2507ac9a064cSDimitry Andric           .addMBB(Check)
2508ac9a064cSDimitry Andric           .addReg(NewReg)
2509ac9a064cSDimitry Andric           .addMBB(Epilog);
2510ac9a064cSDimitry Andric       replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2511ac9a064cSDimitry Andric     }
2512ac9a064cSDimitry Andric   }
2513ac9a064cSDimitry Andric }
2514ac9a064cSDimitry Andric 
generateProlog(SmallVectorImpl<ValueMapTy> & PrologVRMap)2515ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generateProlog(
2516ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2517ac9a064cSDimitry Andric   PrologVRMap.clear();
2518ac9a064cSDimitry Andric   PrologVRMap.resize(Schedule.getNumStages() - 1);
2519ac9a064cSDimitry Andric   DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2520ac9a064cSDimitry Andric   for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2521ac9a064cSDimitry Andric        ++PrologNum) {
2522ac9a064cSDimitry Andric     for (MachineInstr *MI : Schedule.getInstructions()) {
2523ac9a064cSDimitry Andric       if (MI->isPHI())
2524ac9a064cSDimitry Andric         continue;
2525ac9a064cSDimitry Andric       int StageNum = Schedule.getStage(MI);
2526ac9a064cSDimitry Andric       if (StageNum > PrologNum)
2527ac9a064cSDimitry Andric         continue;
2528ac9a064cSDimitry Andric       MachineInstr *NewMI = cloneInstr(MI);
2529ac9a064cSDimitry Andric       updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2530ac9a064cSDimitry Andric       NewMIMap[NewMI] = {PrologNum, StageNum};
2531ac9a064cSDimitry Andric       Prolog->push_back(NewMI);
2532ac9a064cSDimitry Andric     }
2533ac9a064cSDimitry Andric   }
2534ac9a064cSDimitry Andric 
2535ac9a064cSDimitry Andric   for (auto I : NewMIMap) {
2536ac9a064cSDimitry Andric     MachineInstr *MI = I.first;
2537ac9a064cSDimitry Andric     int PrologNum = I.second.first;
2538ac9a064cSDimitry Andric     int StageNum = I.second.second;
2539ac9a064cSDimitry Andric     updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2540ac9a064cSDimitry Andric   }
2541ac9a064cSDimitry Andric 
2542ac9a064cSDimitry Andric   LLVM_DEBUG({
2543ac9a064cSDimitry Andric     dbgs() << "prolog:\n";
2544ac9a064cSDimitry Andric     Prolog->dump();
2545ac9a064cSDimitry Andric   });
2546ac9a064cSDimitry Andric }
2547ac9a064cSDimitry Andric 
generateKernel(SmallVectorImpl<ValueMapTy> & PrologVRMap,SmallVectorImpl<ValueMapTy> & KernelVRMap,InstrMapTy & LastStage0Insts)2548ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generateKernel(
2549ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &PrologVRMap,
2550ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2551ac9a064cSDimitry Andric   KernelVRMap.clear();
2552ac9a064cSDimitry Andric   KernelVRMap.resize(NumUnroll);
2553ac9a064cSDimitry Andric   SmallVector<ValueMapTy> PhiVRMap;
2554ac9a064cSDimitry Andric   PhiVRMap.resize(NumUnroll);
2555ac9a064cSDimitry Andric   DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2556ac9a064cSDimitry Andric   DenseMap<MachineInstr *, MachineInstr *> MIMapLastStage0;
2557ac9a064cSDimitry Andric   for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2558ac9a064cSDimitry Andric     for (MachineInstr *MI : Schedule.getInstructions()) {
2559ac9a064cSDimitry Andric       if (MI->isPHI())
2560ac9a064cSDimitry Andric         continue;
2561ac9a064cSDimitry Andric       int StageNum = Schedule.getStage(MI);
2562ac9a064cSDimitry Andric       MachineInstr *NewMI = cloneInstr(MI);
2563ac9a064cSDimitry Andric       if (UnrollNum == NumUnroll - 1)
2564ac9a064cSDimitry Andric         LastStage0Insts[MI] = NewMI;
2565ac9a064cSDimitry Andric       updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2566ac9a064cSDimitry Andric                      (UnrollNum == NumUnroll - 1 && StageNum == 0));
2567ac9a064cSDimitry Andric       generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2568ac9a064cSDimitry Andric       NewMIMap[NewMI] = {UnrollNum, StageNum};
2569ac9a064cSDimitry Andric       NewKernel->push_back(NewMI);
2570ac9a064cSDimitry Andric     }
2571ac9a064cSDimitry Andric   }
2572ac9a064cSDimitry Andric 
2573ac9a064cSDimitry Andric   for (auto I : NewMIMap) {
2574ac9a064cSDimitry Andric     MachineInstr *MI = I.first;
2575ac9a064cSDimitry Andric     int UnrollNum = I.second.first;
2576ac9a064cSDimitry Andric     int StageNum = I.second.second;
2577ac9a064cSDimitry Andric     updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2578ac9a064cSDimitry Andric   }
2579ac9a064cSDimitry Andric 
2580ac9a064cSDimitry Andric   // If remaining trip count is greater than NumUnroll-1, loop continues
2581ac9a064cSDimitry Andric   insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2582ac9a064cSDimitry Andric                    *Epilog);
2583ac9a064cSDimitry Andric 
2584ac9a064cSDimitry Andric   LLVM_DEBUG({
2585ac9a064cSDimitry Andric     dbgs() << "kernel:\n";
2586ac9a064cSDimitry Andric     NewKernel->dump();
2587ac9a064cSDimitry Andric   });
2588ac9a064cSDimitry Andric }
2589ac9a064cSDimitry Andric 
generateEpilog(SmallVectorImpl<ValueMapTy> & KernelVRMap,SmallVectorImpl<ValueMapTy> & EpilogVRMap,InstrMapTy & LastStage0Insts)2590ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generateEpilog(
2591ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &KernelVRMap,
2592ac9a064cSDimitry Andric     SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2593ac9a064cSDimitry Andric   EpilogVRMap.clear();
2594ac9a064cSDimitry Andric   EpilogVRMap.resize(Schedule.getNumStages() - 1);
2595ac9a064cSDimitry Andric   DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2596ac9a064cSDimitry Andric   for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2597ac9a064cSDimitry Andric        ++EpilogNum) {
2598ac9a064cSDimitry Andric     for (MachineInstr *MI : Schedule.getInstructions()) {
2599ac9a064cSDimitry Andric       if (MI->isPHI())
2600ac9a064cSDimitry Andric         continue;
2601ac9a064cSDimitry Andric       int StageNum = Schedule.getStage(MI);
2602ac9a064cSDimitry Andric       if (StageNum <= EpilogNum)
2603ac9a064cSDimitry Andric         continue;
2604ac9a064cSDimitry Andric       MachineInstr *NewMI = cloneInstr(MI);
2605ac9a064cSDimitry Andric       updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2606ac9a064cSDimitry Andric       NewMIMap[NewMI] = {EpilogNum, StageNum};
2607ac9a064cSDimitry Andric       Epilog->push_back(NewMI);
2608ac9a064cSDimitry Andric     }
2609ac9a064cSDimitry Andric   }
2610ac9a064cSDimitry Andric 
2611ac9a064cSDimitry Andric   for (auto I : NewMIMap) {
2612ac9a064cSDimitry Andric     MachineInstr *MI = I.first;
2613ac9a064cSDimitry Andric     int EpilogNum = I.second.first;
2614ac9a064cSDimitry Andric     int StageNum = I.second.second;
2615ac9a064cSDimitry Andric     updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2616ac9a064cSDimitry Andric   }
2617ac9a064cSDimitry Andric 
2618ac9a064cSDimitry Andric   // If there are remaining iterations, they are executed in the original loop.
2619ac9a064cSDimitry Andric   // Instructions related to loop control, such as loop counter comparison,
2620ac9a064cSDimitry Andric   // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2621ac9a064cSDimitry Andric   // in stage 0. Thus, the map is for the last one in the kernel.
2622ac9a064cSDimitry Andric   insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2623ac9a064cSDimitry Andric 
2624ac9a064cSDimitry Andric   LLVM_DEBUG({
2625ac9a064cSDimitry Andric     dbgs() << "epilog:\n";
2626ac9a064cSDimitry Andric     Epilog->dump();
2627ac9a064cSDimitry Andric   });
2628ac9a064cSDimitry Andric }
2629ac9a064cSDimitry Andric 
2630ac9a064cSDimitry Andric /// Calculate the number of unroll required and set it to NumUnroll
calcNumUnroll()2631ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::calcNumUnroll() {
2632ac9a064cSDimitry Andric   DenseMap<MachineInstr *, unsigned> Inst2Idx;
2633ac9a064cSDimitry Andric   NumUnroll = 1;
2634ac9a064cSDimitry Andric   for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2635ac9a064cSDimitry Andric     Inst2Idx[Schedule.getInstructions()[I]] = I;
2636ac9a064cSDimitry Andric 
2637ac9a064cSDimitry Andric   for (MachineInstr *MI : Schedule.getInstructions()) {
2638ac9a064cSDimitry Andric     if (MI->isPHI())
2639ac9a064cSDimitry Andric       continue;
2640ac9a064cSDimitry Andric     int StageNum = Schedule.getStage(MI);
2641ac9a064cSDimitry Andric     for (const MachineOperand &MO : MI->uses()) {
2642ac9a064cSDimitry Andric       if (!MO.isReg() || !MO.getReg().isVirtual())
2643ac9a064cSDimitry Andric         continue;
2644ac9a064cSDimitry Andric       MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2645ac9a064cSDimitry Andric       if (DefMI->getParent() != OrigKernel)
2646ac9a064cSDimitry Andric         continue;
2647ac9a064cSDimitry Andric 
2648ac9a064cSDimitry Andric       int NumUnrollLocal = 1;
2649ac9a064cSDimitry Andric       if (DefMI->isPHI()) {
2650ac9a064cSDimitry Andric         ++NumUnrollLocal;
2651ac9a064cSDimitry Andric         // canApply() guarantees that DefMI is not phi and is an instruction in
2652ac9a064cSDimitry Andric         // the loop
2653ac9a064cSDimitry Andric         DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2654ac9a064cSDimitry Andric       }
2655ac9a064cSDimitry Andric       NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2656ac9a064cSDimitry Andric       if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2657ac9a064cSDimitry Andric         --NumUnrollLocal;
2658ac9a064cSDimitry Andric       NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2659ac9a064cSDimitry Andric     }
2660ac9a064cSDimitry Andric   }
2661ac9a064cSDimitry Andric   LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2662ac9a064cSDimitry Andric }
2663ac9a064cSDimitry Andric 
2664ac9a064cSDimitry Andric /// Create new virtual registers for definitions of NewMI and update NewMI.
2665ac9a064cSDimitry Andric /// If the definitions are referenced after the pipelined loop, phis are
2666ac9a064cSDimitry Andric /// created to merge with other routes.
updateInstrDef(MachineInstr * NewMI,ValueMapTy & VRMap,bool LastDef)2667ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2668ac9a064cSDimitry Andric                                                ValueMapTy &VRMap,
2669ac9a064cSDimitry Andric                                                bool LastDef) {
2670ac9a064cSDimitry Andric   for (MachineOperand &MO : NewMI->operands()) {
2671ac9a064cSDimitry Andric     if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef())
2672ac9a064cSDimitry Andric       continue;
2673ac9a064cSDimitry Andric     Register Reg = MO.getReg();
2674ac9a064cSDimitry Andric     const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2675ac9a064cSDimitry Andric     Register NewReg = MRI.createVirtualRegister(RC);
2676ac9a064cSDimitry Andric     MO.setReg(NewReg);
2677ac9a064cSDimitry Andric     VRMap[Reg] = NewReg;
2678ac9a064cSDimitry Andric     if (LastDef)
2679ac9a064cSDimitry Andric       mergeRegUsesAfterPipeline(Reg, NewReg);
2680ac9a064cSDimitry Andric   }
2681ac9a064cSDimitry Andric }
2682ac9a064cSDimitry Andric 
expand()2683ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::expand() {
2684ac9a064cSDimitry Andric   OrigKernel = Schedule.getLoop()->getTopBlock();
2685ac9a064cSDimitry Andric   OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2686ac9a064cSDimitry Andric   OrigExit = Schedule.getLoop()->getExitBlock();
2687ac9a064cSDimitry Andric 
2688ac9a064cSDimitry Andric   LLVM_DEBUG(Schedule.dump());
2689ac9a064cSDimitry Andric 
2690ac9a064cSDimitry Andric   generatePipelinedLoop();
2691ac9a064cSDimitry Andric }
2692ac9a064cSDimitry Andric 
2693ac9a064cSDimitry Andric /// Check if ModuloScheduleExpanderMVE can be applied to L
canApply(MachineLoop & L)2694ac9a064cSDimitry Andric bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) {
2695ac9a064cSDimitry Andric   if (!L.getExitBlock()) {
2696ac9a064cSDimitry Andric     LLVM_DEBUG(
2697ac9a064cSDimitry Andric         dbgs() << "Can not apply MVE expander: No single exit block.\n";);
2698ac9a064cSDimitry Andric     return false;
2699ac9a064cSDimitry Andric   }
2700ac9a064cSDimitry Andric 
2701ac9a064cSDimitry Andric   MachineBasicBlock *BB = L.getTopBlock();
2702ac9a064cSDimitry Andric   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
2703ac9a064cSDimitry Andric 
2704ac9a064cSDimitry Andric   // Put some constraints on the operands of the phis to simplify the
2705ac9a064cSDimitry Andric   // transformation
2706ac9a064cSDimitry Andric   DenseSet<unsigned> UsedByPhi;
2707ac9a064cSDimitry Andric   for (MachineInstr &MI : BB->phis()) {
2708ac9a064cSDimitry Andric     // Registers defined by phis must be used only inside the loop and be never
2709ac9a064cSDimitry Andric     // used by phis.
2710ac9a064cSDimitry Andric     for (MachineOperand &MO : MI.defs())
2711ac9a064cSDimitry Andric       if (MO.isReg())
2712ac9a064cSDimitry Andric         for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2713ac9a064cSDimitry Andric           if (Ref.getParent() != BB || Ref.isPHI()) {
2714ac9a064cSDimitry Andric             LLVM_DEBUG(dbgs()
2715ac9a064cSDimitry Andric                            << "Can not apply MVE expander: A phi result is "
2716ac9a064cSDimitry Andric                               "referenced outside of the loop or by phi.\n";);
2717ac9a064cSDimitry Andric             return false;
2718ac9a064cSDimitry Andric           }
2719ac9a064cSDimitry Andric 
2720ac9a064cSDimitry Andric     // A source register from the loop block must be defined inside the loop.
2721ac9a064cSDimitry Andric     // A register defined inside the loop must be referenced by only one phi at
2722ac9a064cSDimitry Andric     // most.
2723ac9a064cSDimitry Andric     unsigned InitVal, LoopVal;
2724ac9a064cSDimitry Andric     getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2725ac9a064cSDimitry Andric     if (!Register(LoopVal).isVirtual() ||
2726ac9a064cSDimitry Andric         MRI.getVRegDef(LoopVal)->getParent() != BB) {
2727ac9a064cSDimitry Andric       LLVM_DEBUG(
2728ac9a064cSDimitry Andric           dbgs() << "Can not apply MVE expander: A phi source value coming "
2729ac9a064cSDimitry Andric                     "from the loop is not defined in the loop.\n";);
2730ac9a064cSDimitry Andric       return false;
2731ac9a064cSDimitry Andric     }
2732ac9a064cSDimitry Andric     if (UsedByPhi.count(LoopVal)) {
2733ac9a064cSDimitry Andric       LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2734ac9a064cSDimitry Andric                            "loop is referenced by two or more phis.\n";);
2735ac9a064cSDimitry Andric       return false;
2736ac9a064cSDimitry Andric     }
2737ac9a064cSDimitry Andric     UsedByPhi.insert(LoopVal);
2738ac9a064cSDimitry Andric   }
2739ac9a064cSDimitry Andric 
2740ac9a064cSDimitry Andric   return true;
2741ac9a064cSDimitry Andric }
2742ac9a064cSDimitry Andric 
27431d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
27441d5ae102SDimitry Andric // ModuloScheduleTestPass implementation
27451d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
27461d5ae102SDimitry Andric // This pass constructs a ModuloSchedule from its module and runs
27471d5ae102SDimitry Andric // ModuloScheduleExpander.
27481d5ae102SDimitry Andric //
27491d5ae102SDimitry Andric // The module is expected to contain a single-block analyzable loop.
27501d5ae102SDimitry Andric // The total order of instructions is taken from the loop as-is.
27511d5ae102SDimitry Andric // Instructions are expected to be annotated with a PostInstrSymbol.
27521d5ae102SDimitry Andric // This PostInstrSymbol must have the following format:
27531d5ae102SDimitry Andric //  "Stage=%d Cycle=%d".
27541d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
27551d5ae102SDimitry Andric 
27561d5ae102SDimitry Andric namespace {
27571d5ae102SDimitry Andric class ModuloScheduleTest : public MachineFunctionPass {
27581d5ae102SDimitry Andric public:
27591d5ae102SDimitry Andric   static char ID;
27601d5ae102SDimitry Andric 
ModuloScheduleTest()27611d5ae102SDimitry Andric   ModuloScheduleTest() : MachineFunctionPass(ID) {
27621d5ae102SDimitry Andric     initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
27631d5ae102SDimitry Andric   }
27641d5ae102SDimitry Andric 
27651d5ae102SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
27661d5ae102SDimitry Andric   void runOnLoop(MachineFunction &MF, MachineLoop &L);
27671d5ae102SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const27681d5ae102SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
2769ac9a064cSDimitry Andric     AU.addRequired<MachineLoopInfoWrapperPass>();
2770ac9a064cSDimitry Andric     AU.addRequired<LiveIntervalsWrapperPass>();
27711d5ae102SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
27721d5ae102SDimitry Andric   }
27731d5ae102SDimitry Andric };
27741d5ae102SDimitry Andric } // namespace
27751d5ae102SDimitry Andric 
27761d5ae102SDimitry Andric char ModuloScheduleTest::ID = 0;
27771d5ae102SDimitry Andric 
27781d5ae102SDimitry Andric INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
27791d5ae102SDimitry Andric                       "Modulo Schedule test pass", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)2780ac9a064cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
2781ac9a064cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
27821d5ae102SDimitry Andric INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
27831d5ae102SDimitry Andric                     "Modulo Schedule test pass", false, false)
27841d5ae102SDimitry Andric 
27851d5ae102SDimitry Andric bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2786ac9a064cSDimitry Andric   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
27871d5ae102SDimitry Andric   for (auto *L : MLI) {
27881d5ae102SDimitry Andric     if (L->getTopBlock() != L->getBottomBlock())
27891d5ae102SDimitry Andric       continue;
27901d5ae102SDimitry Andric     runOnLoop(MF, *L);
27911d5ae102SDimitry Andric     return false;
27921d5ae102SDimitry Andric   }
27931d5ae102SDimitry Andric   return false;
27941d5ae102SDimitry Andric }
27951d5ae102SDimitry Andric 
parseSymbolString(StringRef S,int & Cycle,int & Stage)27961d5ae102SDimitry Andric static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
27971d5ae102SDimitry Andric   std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
27981d5ae102SDimitry Andric   std::pair<StringRef, StringRef> StageTokenAndValue =
27991d5ae102SDimitry Andric       getToken(StageAndCycle.first, "-");
28001d5ae102SDimitry Andric   std::pair<StringRef, StringRef> CycleTokenAndValue =
28011d5ae102SDimitry Andric       getToken(StageAndCycle.second, "-");
28021d5ae102SDimitry Andric   if (StageTokenAndValue.first != "Stage" ||
28031d5ae102SDimitry Andric       CycleTokenAndValue.first != "_Cycle") {
28041d5ae102SDimitry Andric     llvm_unreachable(
28051d5ae102SDimitry Andric         "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
28061d5ae102SDimitry Andric     return;
28071d5ae102SDimitry Andric   }
28081d5ae102SDimitry Andric 
28091d5ae102SDimitry Andric   StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
28101d5ae102SDimitry Andric   CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
28111d5ae102SDimitry Andric 
28121d5ae102SDimitry Andric   dbgs() << "  Stage=" << Stage << ", Cycle=" << Cycle << "\n";
28131d5ae102SDimitry Andric }
28141d5ae102SDimitry Andric 
runOnLoop(MachineFunction & MF,MachineLoop & L)28151d5ae102SDimitry Andric void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2816ac9a064cSDimitry Andric   LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
28171d5ae102SDimitry Andric   MachineBasicBlock *BB = L.getTopBlock();
28181d5ae102SDimitry Andric   dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
28191d5ae102SDimitry Andric 
28201d5ae102SDimitry Andric   DenseMap<MachineInstr *, int> Cycle, Stage;
28211d5ae102SDimitry Andric   std::vector<MachineInstr *> Instrs;
28221d5ae102SDimitry Andric   for (MachineInstr &MI : *BB) {
28231d5ae102SDimitry Andric     if (MI.isTerminator())
28241d5ae102SDimitry Andric       continue;
28251d5ae102SDimitry Andric     Instrs.push_back(&MI);
28261d5ae102SDimitry Andric     if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
28271d5ae102SDimitry Andric       dbgs() << "Parsing post-instr symbol for " << MI;
28281d5ae102SDimitry Andric       parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
28291d5ae102SDimitry Andric     }
28301d5ae102SDimitry Andric   }
28311d5ae102SDimitry Andric 
28321d5ae102SDimitry Andric   ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
28331d5ae102SDimitry Andric                     std::move(Stage));
28341d5ae102SDimitry Andric   ModuloScheduleExpander MSE(
28351d5ae102SDimitry Andric       MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
28361d5ae102SDimitry Andric   MSE.expand();
28371d5ae102SDimitry Andric   MSE.cleanup();
28381d5ae102SDimitry Andric }
28391d5ae102SDimitry Andric 
28401d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
28411d5ae102SDimitry Andric // ModuloScheduleTestAnnotater implementation
28421d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
28431d5ae102SDimitry Andric 
annotate()28441d5ae102SDimitry Andric void ModuloScheduleTestAnnotater::annotate() {
28451d5ae102SDimitry Andric   for (MachineInstr *MI : S.getInstructions()) {
28461d5ae102SDimitry Andric     SmallVector<char, 16> SV;
28471d5ae102SDimitry Andric     raw_svector_ostream OS(SV);
28481d5ae102SDimitry Andric     OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
28491d5ae102SDimitry Andric     MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
28501d5ae102SDimitry Andric     MI->setPostInstrSymbol(MF, Sym);
28511d5ae102SDimitry Andric   }
28521d5ae102SDimitry Andric }
2853