xref: /src/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1c0981da4SDimitry Andric //===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
2c0981da4SDimitry Andric //
3c0981da4SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c0981da4SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5c0981da4SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c0981da4SDimitry Andric //
7c0981da4SDimitry Andric //===----------------------------------------------------------------------===//
8c0981da4SDimitry Andric /// \file
9c0981da4SDimitry Andric /// This file implements the LoadStoreOpt optimization pass.
10c0981da4SDimitry Andric //===----------------------------------------------------------------------===//
11c0981da4SDimitry Andric 
12c0981da4SDimitry Andric #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
137fa27ce4SDimitry Andric #include "llvm/ADT/STLExtras.h"
147fa27ce4SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
15c0981da4SDimitry Andric #include "llvm/ADT/Statistic.h"
16c0981da4SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
17c0981da4SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
18c0981da4SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
19c0981da4SDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20c0981da4SDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21c0981da4SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
22c0981da4SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
237fa27ce4SDimitry Andric #include "llvm/CodeGen/LowLevelTypeUtils.h"
24c0981da4SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
25c0981da4SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
26c0981da4SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
27c0981da4SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
28c0981da4SDimitry Andric #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
29c0981da4SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
30c0981da4SDimitry Andric #include "llvm/CodeGen/Register.h"
31c0981da4SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
32c0981da4SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
33c0981da4SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
34c0981da4SDimitry Andric #include "llvm/InitializePasses.h"
35c0981da4SDimitry Andric #include "llvm/Support/AtomicOrdering.h"
36c0981da4SDimitry Andric #include "llvm/Support/Casting.h"
37c0981da4SDimitry Andric #include "llvm/Support/Debug.h"
38c0981da4SDimitry Andric #include "llvm/Support/ErrorHandling.h"
39c0981da4SDimitry Andric #include "llvm/Support/MathExtras.h"
40c0981da4SDimitry Andric #include <algorithm>
41c0981da4SDimitry Andric 
42c0981da4SDimitry Andric #define DEBUG_TYPE "loadstore-opt"
43c0981da4SDimitry Andric 
44c0981da4SDimitry Andric using namespace llvm;
45c0981da4SDimitry Andric using namespace ore;
46c0981da4SDimitry Andric using namespace MIPatternMatch;
47c0981da4SDimitry Andric 
48c0981da4SDimitry Andric STATISTIC(NumStoresMerged, "Number of stores merged");
49c0981da4SDimitry Andric 
50c0981da4SDimitry Andric const unsigned MaxStoreSizeToForm = 128;
51c0981da4SDimitry Andric 
52c0981da4SDimitry Andric char LoadStoreOpt::ID = 0;
53c0981da4SDimitry Andric INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
54c0981da4SDimitry Andric                       false, false)
55c0981da4SDimitry Andric INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
56c0981da4SDimitry Andric                     false, false)
57c0981da4SDimitry Andric 
LoadStoreOpt(std::function<bool (const MachineFunction &)> F)58c0981da4SDimitry Andric LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
59c0981da4SDimitry Andric     : MachineFunctionPass(ID), DoNotRunPass(F) {}
60c0981da4SDimitry Andric 
LoadStoreOpt()61c0981da4SDimitry Andric LoadStoreOpt::LoadStoreOpt()
62c0981da4SDimitry Andric     : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
63c0981da4SDimitry Andric 
init(MachineFunction & MF)64c0981da4SDimitry Andric void LoadStoreOpt::init(MachineFunction &MF) {
65c0981da4SDimitry Andric   this->MF = &MF;
66c0981da4SDimitry Andric   MRI = &MF.getRegInfo();
67c0981da4SDimitry Andric   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
68c0981da4SDimitry Andric   TLI = MF.getSubtarget().getTargetLowering();
69c0981da4SDimitry Andric   LI = MF.getSubtarget().getLegalizerInfo();
70c0981da4SDimitry Andric   Builder.setMF(MF);
71c0981da4SDimitry Andric   IsPreLegalizer = !MF.getProperties().hasProperty(
72c0981da4SDimitry Andric       MachineFunctionProperties::Property::Legalized);
73c0981da4SDimitry Andric   InstsToErase.clear();
74c0981da4SDimitry Andric }
75c0981da4SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const76c0981da4SDimitry Andric void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
77c0981da4SDimitry Andric   AU.addRequired<AAResultsWrapperPass>();
78145449b1SDimitry Andric   AU.setPreservesAll();
79c0981da4SDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
80c0981da4SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
81c0981da4SDimitry Andric }
82c0981da4SDimitry Andric 
getPointerInfo(Register Ptr,MachineRegisterInfo & MRI)83c0981da4SDimitry Andric BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
84c0981da4SDimitry Andric                                                 MachineRegisterInfo &MRI) {
85c0981da4SDimitry Andric   BaseIndexOffset Info;
86c0981da4SDimitry Andric   Register PtrAddRHS;
87ac9a064cSDimitry Andric   Register BaseReg;
88ac9a064cSDimitry Andric   if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) {
89ac9a064cSDimitry Andric     Info.setBase(Ptr);
90ac9a064cSDimitry Andric     Info.setOffset(0);
91c0981da4SDimitry Andric     return Info;
92c0981da4SDimitry Andric   }
93ac9a064cSDimitry Andric   Info.setBase(BaseReg);
94c0981da4SDimitry Andric   auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
95c0981da4SDimitry Andric   if (RHSCst)
96ac9a064cSDimitry Andric     Info.setOffset(RHSCst->Value.getSExtValue());
97c0981da4SDimitry Andric 
98c0981da4SDimitry Andric   // Just recognize a simple case for now. In future we'll need to match
99c0981da4SDimitry Andric   // indexing patterns for base + index + constant.
100ac9a064cSDimitry Andric   Info.setIndex(PtrAddRHS);
101c0981da4SDimitry Andric   return Info;
102c0981da4SDimitry Andric }
103c0981da4SDimitry Andric 
aliasIsKnownForLoadStore(const MachineInstr & MI1,const MachineInstr & MI2,bool & IsAlias,MachineRegisterInfo & MRI)104c0981da4SDimitry Andric bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
105c0981da4SDimitry Andric                                                const MachineInstr &MI2,
106c0981da4SDimitry Andric                                                bool &IsAlias,
107c0981da4SDimitry Andric                                                MachineRegisterInfo &MRI) {
108c0981da4SDimitry Andric   auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
109c0981da4SDimitry Andric   auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
110c0981da4SDimitry Andric   if (!LdSt1 || !LdSt2)
111c0981da4SDimitry Andric     return false;
112c0981da4SDimitry Andric 
113c0981da4SDimitry Andric   BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
114c0981da4SDimitry Andric   BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
115c0981da4SDimitry Andric 
116ac9a064cSDimitry Andric   if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid())
117c0981da4SDimitry Andric     return false;
118c0981da4SDimitry Andric 
119ac9a064cSDimitry Andric   LocationSize Size1 = LdSt1->getMemSize();
120ac9a064cSDimitry Andric   LocationSize Size2 = LdSt2->getMemSize();
121c0981da4SDimitry Andric 
122c0981da4SDimitry Andric   int64_t PtrDiff;
123ac9a064cSDimitry Andric   if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
124ac9a064cSDimitry Andric       BasePtr1.hasValidOffset()) {
125ac9a064cSDimitry Andric     PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset();
126c0981da4SDimitry Andric     // If the size of memory access is unknown, do not use it to do analysis.
127c0981da4SDimitry Andric     // One example of unknown size memory access is to load/store scalable
128c0981da4SDimitry Andric     // vector objects on the stack.
129c0981da4SDimitry Andric     // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
130c0981da4SDimitry Andric     // following situations arise:
131ac9a064cSDimitry Andric     if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) {
132c0981da4SDimitry Andric       // [----BasePtr0----]
133c0981da4SDimitry Andric       //                         [---BasePtr1--]
134c0981da4SDimitry Andric       // ========PtrDiff========>
135ac9a064cSDimitry Andric       IsAlias = !((int64_t)Size1.getValue() <= PtrDiff);
136c0981da4SDimitry Andric       return true;
137c0981da4SDimitry Andric     }
138ac9a064cSDimitry Andric     if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) {
139c0981da4SDimitry Andric       //                     [----BasePtr0----]
140c0981da4SDimitry Andric       // [---BasePtr1--]
141c0981da4SDimitry Andric       // =====(-PtrDiff)====>
142ac9a064cSDimitry Andric       IsAlias = !((PtrDiff + (int64_t)Size2.getValue()) <= 0);
143c0981da4SDimitry Andric       return true;
144c0981da4SDimitry Andric     }
145c0981da4SDimitry Andric     return false;
146c0981da4SDimitry Andric   }
147c0981da4SDimitry Andric 
148c0981da4SDimitry Andric   // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
149c0981da4SDimitry Andric   // able to calculate their relative offset if at least one arises
150c0981da4SDimitry Andric   // from an alloca. However, these allocas cannot overlap and we
151c0981da4SDimitry Andric   // can infer there is no alias.
152ac9a064cSDimitry Andric   auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI);
153ac9a064cSDimitry Andric   auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI);
154c0981da4SDimitry Andric   if (!Base0Def || !Base1Def)
155c0981da4SDimitry Andric     return false; // Couldn't tell anything.
156c0981da4SDimitry Andric 
157c0981da4SDimitry Andric 
158c0981da4SDimitry Andric   if (Base0Def->getOpcode() != Base1Def->getOpcode())
159c0981da4SDimitry Andric     return false;
160c0981da4SDimitry Andric 
161c0981da4SDimitry Andric   if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
162c0981da4SDimitry Andric     MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
163c0981da4SDimitry Andric     // If the bases have the same frame index but we couldn't find a
164c0981da4SDimitry Andric     // constant offset, (indices are different) be conservative.
165c0981da4SDimitry Andric     if (Base0Def != Base1Def &&
166c0981da4SDimitry Andric         (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
167c0981da4SDimitry Andric          !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
168c0981da4SDimitry Andric       IsAlias = false;
169c0981da4SDimitry Andric       return true;
170c0981da4SDimitry Andric     }
171c0981da4SDimitry Andric   }
172c0981da4SDimitry Andric 
173c0981da4SDimitry Andric   // This implementation is a lot more primitive than the SDAG one for now.
174c0981da4SDimitry Andric   // FIXME: what about constant pools?
175c0981da4SDimitry Andric   if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
176c0981da4SDimitry Andric     auto GV0 = Base0Def->getOperand(1).getGlobal();
177c0981da4SDimitry Andric     auto GV1 = Base1Def->getOperand(1).getGlobal();
178c0981da4SDimitry Andric     if (GV0 != GV1) {
179c0981da4SDimitry Andric       IsAlias = false;
180c0981da4SDimitry Andric       return true;
181c0981da4SDimitry Andric     }
182c0981da4SDimitry Andric   }
183c0981da4SDimitry Andric 
184c0981da4SDimitry Andric   // Can't tell anything about aliasing.
185c0981da4SDimitry Andric   return false;
186c0981da4SDimitry Andric }
187c0981da4SDimitry Andric 
instMayAlias(const MachineInstr & MI,const MachineInstr & Other,MachineRegisterInfo & MRI,AliasAnalysis * AA)188c0981da4SDimitry Andric bool GISelAddressing::instMayAlias(const MachineInstr &MI,
189c0981da4SDimitry Andric                                    const MachineInstr &Other,
190c0981da4SDimitry Andric                                    MachineRegisterInfo &MRI,
191c0981da4SDimitry Andric                                    AliasAnalysis *AA) {
192c0981da4SDimitry Andric   struct MemUseCharacteristics {
193c0981da4SDimitry Andric     bool IsVolatile;
194c0981da4SDimitry Andric     bool IsAtomic;
195c0981da4SDimitry Andric     Register BasePtr;
196c0981da4SDimitry Andric     int64_t Offset;
197ac9a064cSDimitry Andric     LocationSize NumBytes;
198c0981da4SDimitry Andric     MachineMemOperand *MMO;
199c0981da4SDimitry Andric   };
200c0981da4SDimitry Andric 
201c0981da4SDimitry Andric   auto getCharacteristics =
202c0981da4SDimitry Andric       [&](const MachineInstr *MI) -> MemUseCharacteristics {
203c0981da4SDimitry Andric     if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
204c0981da4SDimitry Andric       Register BaseReg;
205c0981da4SDimitry Andric       int64_t Offset = 0;
206c0981da4SDimitry Andric       // No pre/post-inc addressing modes are considered here, unlike in SDAG.
207c0981da4SDimitry Andric       if (!mi_match(LS->getPointerReg(), MRI,
208c0981da4SDimitry Andric                     m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
209c0981da4SDimitry Andric         BaseReg = LS->getPointerReg();
210c0981da4SDimitry Andric         Offset = 0;
211c0981da4SDimitry Andric       }
212c0981da4SDimitry Andric 
213ac9a064cSDimitry Andric       LocationSize Size = LS->getMMO().getSize();
214c0981da4SDimitry Andric       return {LS->isVolatile(),       LS->isAtomic(), BaseReg,
215c0981da4SDimitry Andric               Offset /*base offset*/, Size,           &LS->getMMO()};
216c0981da4SDimitry Andric     }
217c0981da4SDimitry Andric     // FIXME: support recognizing lifetime instructions.
218c0981da4SDimitry Andric     // Default.
219c0981da4SDimitry Andric     return {false /*isvolatile*/,
220ac9a064cSDimitry Andric             /*isAtomic*/ false,
221ac9a064cSDimitry Andric             Register(),
222ac9a064cSDimitry Andric             (int64_t)0 /*offset*/,
223ac9a064cSDimitry Andric             LocationSize::beforeOrAfterPointer() /*size*/,
224c0981da4SDimitry Andric             (MachineMemOperand *)nullptr};
225c0981da4SDimitry Andric   };
226c0981da4SDimitry Andric   MemUseCharacteristics MUC0 = getCharacteristics(&MI),
227c0981da4SDimitry Andric                         MUC1 = getCharacteristics(&Other);
228c0981da4SDimitry Andric 
229c0981da4SDimitry Andric   // If they are to the same address, then they must be aliases.
230c0981da4SDimitry Andric   if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
231c0981da4SDimitry Andric       MUC0.Offset == MUC1.Offset)
232c0981da4SDimitry Andric     return true;
233c0981da4SDimitry Andric 
234c0981da4SDimitry Andric   // If they are both volatile then they cannot be reordered.
235c0981da4SDimitry Andric   if (MUC0.IsVolatile && MUC1.IsVolatile)
236c0981da4SDimitry Andric     return true;
237c0981da4SDimitry Andric 
238c0981da4SDimitry Andric   // Be conservative about atomics for the moment
239c0981da4SDimitry Andric   // TODO: This is way overconservative for unordered atomics (see D66309)
240c0981da4SDimitry Andric   if (MUC0.IsAtomic && MUC1.IsAtomic)
241c0981da4SDimitry Andric     return true;
242c0981da4SDimitry Andric 
243c0981da4SDimitry Andric   // If one operation reads from invariant memory, and the other may store, they
244c0981da4SDimitry Andric   // cannot alias.
245c0981da4SDimitry Andric   if (MUC0.MMO && MUC1.MMO) {
246c0981da4SDimitry Andric     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
247c0981da4SDimitry Andric         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
248c0981da4SDimitry Andric       return false;
249c0981da4SDimitry Andric   }
250c0981da4SDimitry Andric 
251ac9a064cSDimitry Andric   // If NumBytes is scalable and offset is not 0, conservatively return may
252ac9a064cSDimitry Andric   // alias
253ac9a064cSDimitry Andric   if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) ||
254ac9a064cSDimitry Andric       (MUC1.NumBytes.isScalable() && MUC1.Offset != 0))
255ac9a064cSDimitry Andric     return true;
256ac9a064cSDimitry Andric 
257ac9a064cSDimitry Andric   const bool BothNotScalable =
258ac9a064cSDimitry Andric       !MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable();
259ac9a064cSDimitry Andric 
260c0981da4SDimitry Andric   // Try to prove that there is aliasing, or that there is no aliasing. Either
261c0981da4SDimitry Andric   // way, we can return now. If nothing can be proved, proceed with more tests.
262c0981da4SDimitry Andric   bool IsAlias;
263ac9a064cSDimitry Andric   if (BothNotScalable &&
264ac9a064cSDimitry Andric       GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
265c0981da4SDimitry Andric     return IsAlias;
266c0981da4SDimitry Andric 
267c0981da4SDimitry Andric   // The following all rely on MMO0 and MMO1 being valid.
268c0981da4SDimitry Andric   if (!MUC0.MMO || !MUC1.MMO)
269c0981da4SDimitry Andric     return true;
270c0981da4SDimitry Andric 
271c0981da4SDimitry Andric   // FIXME: port the alignment based alias analysis from SDAG's isAlias().
272c0981da4SDimitry Andric   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
273c0981da4SDimitry Andric   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
274ac9a064cSDimitry Andric   LocationSize Size0 = MUC0.NumBytes;
275ac9a064cSDimitry Andric   LocationSize Size1 = MUC1.NumBytes;
276ac9a064cSDimitry Andric   if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0.hasValue() &&
277ac9a064cSDimitry Andric       Size1.hasValue()) {
278c0981da4SDimitry Andric     // Use alias analysis information.
279c0981da4SDimitry Andric     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
280ac9a064cSDimitry Andric     int64_t Overlap0 =
281ac9a064cSDimitry Andric         Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
282ac9a064cSDimitry Andric     int64_t Overlap1 =
283ac9a064cSDimitry Andric         Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
284ac9a064cSDimitry Andric     LocationSize Loc0 =
285ac9a064cSDimitry Andric         Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
286ac9a064cSDimitry Andric     LocationSize Loc1 =
287ac9a064cSDimitry Andric         Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
288ac9a064cSDimitry Andric 
289ac9a064cSDimitry Andric     if (AA->isNoAlias(
290ac9a064cSDimitry Andric             MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()),
291ac9a064cSDimitry Andric             MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo())))
292c0981da4SDimitry Andric       return false;
293c0981da4SDimitry Andric   }
294c0981da4SDimitry Andric 
295c0981da4SDimitry Andric   // Otherwise we have to assume they alias.
296c0981da4SDimitry Andric   return true;
297c0981da4SDimitry Andric }
298c0981da4SDimitry Andric 
299c0981da4SDimitry Andric /// Returns true if the instruction creates an unavoidable hazard that
300c0981da4SDimitry Andric /// forces a boundary between store merge candidates.
isInstHardMergeHazard(MachineInstr & MI)301c0981da4SDimitry Andric static bool isInstHardMergeHazard(MachineInstr &MI) {
302c0981da4SDimitry Andric   return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
303c0981da4SDimitry Andric }
304c0981da4SDimitry Andric 
mergeStores(SmallVectorImpl<GStore * > & StoresToMerge)305c0981da4SDimitry Andric bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
306c0981da4SDimitry Andric   // Try to merge all the stores in the vector, splitting into separate segments
307c0981da4SDimitry Andric   // as necessary.
308c0981da4SDimitry Andric   assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
309c0981da4SDimitry Andric   LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
310c0981da4SDimitry Andric   LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
311c0981da4SDimitry Andric   unsigned AS = PtrTy.getAddressSpace();
312c0981da4SDimitry Andric   // Ensure the legal store info is computed for this address space.
313c0981da4SDimitry Andric   initializeStoreMergeTargetInfo(AS);
314c0981da4SDimitry Andric   const auto &LegalSizes = LegalStoreSizes[AS];
315c0981da4SDimitry Andric 
316c0981da4SDimitry Andric #ifndef NDEBUG
3174b4fe385SDimitry Andric   for (auto *StoreMI : StoresToMerge)
318c0981da4SDimitry Andric     assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
319c0981da4SDimitry Andric #endif
320c0981da4SDimitry Andric 
321ac9a064cSDimitry Andric   const auto &DL = MF->getFunction().getDataLayout();
322c0981da4SDimitry Andric   bool AnyMerged = false;
323c0981da4SDimitry Andric   do {
3247fa27ce4SDimitry Andric     unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size());
325e3b55780SDimitry Andric     unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
326c0981da4SDimitry Andric     // Compute the biggest store we can generate to handle the number of stores.
327c0981da4SDimitry Andric     unsigned MergeSizeBits;
328c0981da4SDimitry Andric     for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
329c0981da4SDimitry Andric       LLT StoreTy = LLT::scalar(MergeSizeBits);
330c0981da4SDimitry Andric       EVT StoreEVT =
331c0981da4SDimitry Andric           getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext());
332c0981da4SDimitry Andric       if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
333c0981da4SDimitry Andric           TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
334c0981da4SDimitry Andric           (TLI->isTypeLegal(StoreEVT)))
335c0981da4SDimitry Andric         break; // We can generate a MergeSize bits store.
336c0981da4SDimitry Andric     }
337c0981da4SDimitry Andric     if (MergeSizeBits <= OrigTy.getSizeInBits())
338c0981da4SDimitry Andric       return AnyMerged; // No greater merge.
339c0981da4SDimitry Andric 
340c0981da4SDimitry Andric     unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
341c0981da4SDimitry Andric     // Perform the actual merging.
342c0981da4SDimitry Andric     SmallVector<GStore *, 8> SingleMergeStores(
343c0981da4SDimitry Andric         StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
344c0981da4SDimitry Andric     AnyMerged |= doSingleStoreMerge(SingleMergeStores);
345c0981da4SDimitry Andric     StoresToMerge.erase(StoresToMerge.begin(),
346c0981da4SDimitry Andric                         StoresToMerge.begin() + NumStoresToMerge);
347c0981da4SDimitry Andric   } while (StoresToMerge.size() > 1);
348c0981da4SDimitry Andric   return AnyMerged;
349c0981da4SDimitry Andric }
350c0981da4SDimitry Andric 
isLegalOrBeforeLegalizer(const LegalityQuery & Query,MachineFunction & MF) const351c0981da4SDimitry Andric bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
352c0981da4SDimitry Andric                                             MachineFunction &MF) const {
353c0981da4SDimitry Andric   auto Action = LI->getAction(Query).Action;
354c0981da4SDimitry Andric   // If the instruction is unsupported, it can't be legalized at all.
355c0981da4SDimitry Andric   if (Action == LegalizeActions::Unsupported)
356c0981da4SDimitry Andric     return false;
357c0981da4SDimitry Andric   return IsPreLegalizer || Action == LegalizeAction::Legal;
358c0981da4SDimitry Andric }
359c0981da4SDimitry Andric 
doSingleStoreMerge(SmallVectorImpl<GStore * > & Stores)360c0981da4SDimitry Andric bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
361c0981da4SDimitry Andric   assert(Stores.size() > 1);
362c0981da4SDimitry Andric   // We know that all the stores are consecutive and there are no aliasing
363c0981da4SDimitry Andric   // operations in the range. However, the values that are being stored may be
364c0981da4SDimitry Andric   // generated anywhere before each store. To ensure we have the values
365c0981da4SDimitry Andric   // available, we materialize the wide value and new store at the place of the
366c0981da4SDimitry Andric   // final store in the merge sequence.
367c0981da4SDimitry Andric   GStore *FirstStore = Stores[0];
368c0981da4SDimitry Andric   const unsigned NumStores = Stores.size();
369c0981da4SDimitry Andric   LLT SmallTy = MRI->getType(FirstStore->getValueReg());
370c0981da4SDimitry Andric   LLT WideValueTy =
371e3b55780SDimitry Andric       LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedValue());
372c0981da4SDimitry Andric 
373c0981da4SDimitry Andric   // For each store, compute pairwise merged debug locs.
374e3b55780SDimitry Andric   DebugLoc MergedLoc = Stores.front()->getDebugLoc();
375e3b55780SDimitry Andric   for (auto *Store : drop_begin(Stores))
376e3b55780SDimitry Andric     MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc());
377e3b55780SDimitry Andric 
378c0981da4SDimitry Andric   Builder.setInstr(*Stores.back());
379c0981da4SDimitry Andric   Builder.setDebugLoc(MergedLoc);
380c0981da4SDimitry Andric 
381c0981da4SDimitry Andric   // If all of the store values are constants, then create a wide constant
382c0981da4SDimitry Andric   // directly. Otherwise, we need to generate some instructions to merge the
383c0981da4SDimitry Andric   // existing values together into a wider type.
384c0981da4SDimitry Andric   SmallVector<APInt, 8> ConstantVals;
3854b4fe385SDimitry Andric   for (auto *Store : Stores) {
386c0981da4SDimitry Andric     auto MaybeCst =
387c0981da4SDimitry Andric         getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
388c0981da4SDimitry Andric     if (!MaybeCst) {
389c0981da4SDimitry Andric       ConstantVals.clear();
390c0981da4SDimitry Andric       break;
391c0981da4SDimitry Andric     }
392c0981da4SDimitry Andric     ConstantVals.emplace_back(MaybeCst->Value);
393c0981da4SDimitry Andric   }
394c0981da4SDimitry Andric 
395c0981da4SDimitry Andric   Register WideReg;
396c0981da4SDimitry Andric   auto *WideMMO =
397c0981da4SDimitry Andric       MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
398c0981da4SDimitry Andric   if (ConstantVals.empty()) {
399c0981da4SDimitry Andric     // Mimic the SDAG behaviour here and don't try to do anything for unknown
400c0981da4SDimitry Andric     // values. In future, we should also support the cases of loads and
401c0981da4SDimitry Andric     // extracted vector elements.
402c0981da4SDimitry Andric     return false;
403c0981da4SDimitry Andric   }
404c0981da4SDimitry Andric 
405c0981da4SDimitry Andric   assert(ConstantVals.size() == NumStores);
406c0981da4SDimitry Andric   // Check if our wide constant is legal.
407c0981da4SDimitry Andric   if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
408c0981da4SDimitry Andric     return false;
409c0981da4SDimitry Andric   APInt WideConst(WideValueTy.getSizeInBits(), 0);
410c0981da4SDimitry Andric   for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
411c0981da4SDimitry Andric     // Insert the smaller constant into the corresponding position in the
412c0981da4SDimitry Andric     // wider one.
413c0981da4SDimitry Andric     WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
414c0981da4SDimitry Andric   }
415c0981da4SDimitry Andric   WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
416c0981da4SDimitry Andric   auto NewStore =
417c0981da4SDimitry Andric       Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
418c0981da4SDimitry Andric   (void) NewStore;
4197fa27ce4SDimitry Andric   LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
4207fa27ce4SDimitry Andric                     << " stores into merged store: " << *NewStore);
4217fa27ce4SDimitry Andric   LLVM_DEBUG(for (auto *MI : Stores) dbgs() << "  " << *MI;);
422c0981da4SDimitry Andric   NumStoresMerged += Stores.size();
423c0981da4SDimitry Andric 
424c0981da4SDimitry Andric   MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
425c0981da4SDimitry Andric   MORE.emit([&]() {
426c0981da4SDimitry Andric     MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
427c0981da4SDimitry Andric                                 FirstStore->getDebugLoc(),
428c0981da4SDimitry Andric                                 FirstStore->getParent());
429c0981da4SDimitry Andric     R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
430c0981da4SDimitry Andric       << NV("OrigWidth", SmallTy.getSizeInBytes())
431c0981da4SDimitry Andric       << " bytes into a single store of "
432c0981da4SDimitry Andric       << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
433c0981da4SDimitry Andric     return R;
434c0981da4SDimitry Andric   });
435c0981da4SDimitry Andric 
4364b4fe385SDimitry Andric   for (auto *MI : Stores)
437c0981da4SDimitry Andric     InstsToErase.insert(MI);
438c0981da4SDimitry Andric   return true;
439c0981da4SDimitry Andric }
440c0981da4SDimitry Andric 
processMergeCandidate(StoreMergeCandidate & C)441c0981da4SDimitry Andric bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
442c0981da4SDimitry Andric   if (C.Stores.size() < 2) {
443c0981da4SDimitry Andric     C.reset();
444c0981da4SDimitry Andric     return false;
445c0981da4SDimitry Andric   }
446c0981da4SDimitry Andric 
447c0981da4SDimitry Andric   LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
448c0981da4SDimitry Andric                     << " stores, starting with " << *C.Stores[0]);
449c0981da4SDimitry Andric   // We know that the stores in the candidate are adjacent.
450c0981da4SDimitry Andric   // Now we need to check if any potential aliasing instructions recorded
451c0981da4SDimitry Andric   // during the search alias with load/stores added to the candidate after.
452c0981da4SDimitry Andric   // For example, if we have the candidate:
453c0981da4SDimitry Andric   //   C.Stores = [ST1, ST2, ST3, ST4]
454c0981da4SDimitry Andric   // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
455c0981da4SDimitry Andric   // ST2, then we would have recorded it into the PotentialAliases structure
456c0981da4SDimitry Andric   // with the associated index value of "1". Then we see ST3 and ST4 and add
457c0981da4SDimitry Andric   // them to the candidate group. We know that LD1 does not alias with ST1 or
458c0981da4SDimitry Andric   // ST2, since we already did that check. However we don't yet know if it
459c0981da4SDimitry Andric   // may alias ST3 and ST4, so we perform those checks now.
460c0981da4SDimitry Andric   SmallVector<GStore *> StoresToMerge;
461c0981da4SDimitry Andric 
462c0981da4SDimitry Andric   auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
463c0981da4SDimitry Andric     for (auto AliasInfo : reverse(C.PotentialAliases)) {
464c0981da4SDimitry Andric       MachineInstr *PotentialAliasOp = AliasInfo.first;
465c0981da4SDimitry Andric       unsigned PreCheckedIdx = AliasInfo.second;
4667fa27ce4SDimitry Andric       if (static_cast<unsigned>(Idx) < PreCheckedIdx) {
4677fa27ce4SDimitry Andric         // Once our store index is lower than the index associated with the
4687fa27ce4SDimitry Andric         // potential alias, we know that we've already checked for this alias
4697fa27ce4SDimitry Andric         // and all of the earlier potential aliases too.
4707fa27ce4SDimitry Andric         return false;
4717fa27ce4SDimitry Andric       }
472c0981da4SDimitry Andric       // Need to check this alias.
473c0981da4SDimitry Andric       if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
474c0981da4SDimitry Andric                                         AA)) {
475c0981da4SDimitry Andric         LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
476c0981da4SDimitry Andric                           << " detected\n");
477c0981da4SDimitry Andric         return true;
478c0981da4SDimitry Andric       }
479c0981da4SDimitry Andric     }
480c0981da4SDimitry Andric     return false;
481c0981da4SDimitry Andric   };
482c0981da4SDimitry Andric   // Start from the last store in the group, and check if it aliases with any
483c0981da4SDimitry Andric   // of the potential aliasing operations in the list.
484c0981da4SDimitry Andric   for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
485c0981da4SDimitry Andric     auto *CheckStore = C.Stores[StoreIdx];
486c0981da4SDimitry Andric     if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
487c0981da4SDimitry Andric       continue;
488c0981da4SDimitry Andric     StoresToMerge.emplace_back(CheckStore);
489c0981da4SDimitry Andric   }
490c0981da4SDimitry Andric 
491c0981da4SDimitry Andric   LLVM_DEBUG(dbgs() << StoresToMerge.size()
492c0981da4SDimitry Andric                     << " stores remaining after alias checks. Merging...\n");
493c0981da4SDimitry Andric 
494c0981da4SDimitry Andric   // Now we've checked for aliasing hazards, merge any stores left.
495c0981da4SDimitry Andric   C.reset();
496c0981da4SDimitry Andric   if (StoresToMerge.size() < 2)
497c0981da4SDimitry Andric     return false;
498c0981da4SDimitry Andric   return mergeStores(StoresToMerge);
499c0981da4SDimitry Andric }
500c0981da4SDimitry Andric 
operationAliasesWithCandidate(MachineInstr & MI,StoreMergeCandidate & C)501c0981da4SDimitry Andric bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
502c0981da4SDimitry Andric                                                  StoreMergeCandidate &C) {
503c0981da4SDimitry Andric   if (C.Stores.empty())
504c0981da4SDimitry Andric     return false;
505c0981da4SDimitry Andric   return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
506c0981da4SDimitry Andric     return instMayAlias(MI, *OtherMI, *MRI, AA);
507c0981da4SDimitry Andric   });
508c0981da4SDimitry Andric }
509c0981da4SDimitry Andric 
addPotentialAlias(MachineInstr & MI)510c0981da4SDimitry Andric void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
511c0981da4SDimitry Andric   PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
512c0981da4SDimitry Andric }
513c0981da4SDimitry Andric 
addStoreToCandidate(GStore & StoreMI,StoreMergeCandidate & C)514c0981da4SDimitry Andric bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
515c0981da4SDimitry Andric                                        StoreMergeCandidate &C) {
516c0981da4SDimitry Andric   // Check if the given store writes to an adjacent address, and other
517c0981da4SDimitry Andric   // requirements.
518c0981da4SDimitry Andric   LLT ValueTy = MRI->getType(StoreMI.getValueReg());
519c0981da4SDimitry Andric   LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
520c0981da4SDimitry Andric 
521c0981da4SDimitry Andric   // Only handle scalars.
522c0981da4SDimitry Andric   if (!ValueTy.isScalar())
523c0981da4SDimitry Andric     return false;
524c0981da4SDimitry Andric 
525c0981da4SDimitry Andric   // Don't allow truncating stores for now.
526c0981da4SDimitry Andric   if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
527c0981da4SDimitry Andric     return false;
528c0981da4SDimitry Andric 
529145449b1SDimitry Andric   // Avoid adding volatile or ordered stores to the candidate. We already have a
530145449b1SDimitry Andric   // check for this in instMayAlias() but that only get's called later between
531145449b1SDimitry Andric   // potential aliasing hazards.
532145449b1SDimitry Andric   if (!StoreMI.isSimple())
533145449b1SDimitry Andric     return false;
534145449b1SDimitry Andric 
535c0981da4SDimitry Andric   Register StoreAddr = StoreMI.getPointerReg();
536c0981da4SDimitry Andric   auto BIO = getPointerInfo(StoreAddr, *MRI);
537ac9a064cSDimitry Andric   Register StoreBase = BIO.getBase();
538c0981da4SDimitry Andric   if (C.Stores.empty()) {
539ac9a064cSDimitry Andric     C.BasePtr = StoreBase;
540ac9a064cSDimitry Andric     if (!BIO.hasValidOffset()) {
541ac9a064cSDimitry Andric       C.CurrentLowestOffset = 0;
542ac9a064cSDimitry Andric     } else {
543ac9a064cSDimitry Andric       C.CurrentLowestOffset = BIO.getOffset();
544ac9a064cSDimitry Andric     }
545c0981da4SDimitry Andric     // This is the first store of the candidate.
546c0981da4SDimitry Andric     // If the offset can't possibly allow for a lower addressed store with the
547c0981da4SDimitry Andric     // same base, don't bother adding it.
548ac9a064cSDimitry Andric     if (BIO.hasValidOffset() &&
549ac9a064cSDimitry Andric         BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes()))
550c0981da4SDimitry Andric       return false;
551c0981da4SDimitry Andric     C.Stores.emplace_back(&StoreMI);
552c0981da4SDimitry Andric     LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
553c0981da4SDimitry Andric                       << StoreMI);
554c0981da4SDimitry Andric     return true;
555c0981da4SDimitry Andric   }
556c0981da4SDimitry Andric 
557c0981da4SDimitry Andric   // Check the store is the same size as the existing ones in the candidate.
558c0981da4SDimitry Andric   if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
559c0981da4SDimitry Andric       ValueTy.getSizeInBits())
560c0981da4SDimitry Andric     return false;
561c0981da4SDimitry Andric 
562c0981da4SDimitry Andric   if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
563c0981da4SDimitry Andric       PtrTy.getAddressSpace())
564c0981da4SDimitry Andric     return false;
565c0981da4SDimitry Andric 
566c0981da4SDimitry Andric   // There are other stores in the candidate. Check that the store address
567c0981da4SDimitry Andric   // writes to the next lowest adjacent address.
568c0981da4SDimitry Andric   if (C.BasePtr != StoreBase)
569c0981da4SDimitry Andric     return false;
570ac9a064cSDimitry Andric   // If we don't have a valid offset, we can't guarantee to be an adjacent
571ac9a064cSDimitry Andric   // offset.
572ac9a064cSDimitry Andric   if (!BIO.hasValidOffset())
573ac9a064cSDimitry Andric     return false;
574ac9a064cSDimitry Andric   if ((C.CurrentLowestOffset -
575ac9a064cSDimitry Andric        static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset())
576c0981da4SDimitry Andric     return false;
577c0981da4SDimitry Andric 
578c0981da4SDimitry Andric   // This writes to an adjacent address. Allow it.
579c0981da4SDimitry Andric   C.Stores.emplace_back(&StoreMI);
580c0981da4SDimitry Andric   C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
581c0981da4SDimitry Andric   LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
582c0981da4SDimitry Andric   return true;
583c0981da4SDimitry Andric }
584c0981da4SDimitry Andric 
mergeBlockStores(MachineBasicBlock & MBB)585c0981da4SDimitry Andric bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
586c0981da4SDimitry Andric   bool Changed = false;
587c0981da4SDimitry Andric   // Walk through the block bottom-up, looking for merging candidates.
588c0981da4SDimitry Andric   StoreMergeCandidate Candidate;
58977fc4c14SDimitry Andric   for (MachineInstr &MI : llvm::reverse(MBB)) {
590c0981da4SDimitry Andric     if (InstsToErase.contains(&MI))
591c0981da4SDimitry Andric       continue;
592c0981da4SDimitry Andric 
59377fc4c14SDimitry Andric     if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
594c0981da4SDimitry Andric       // We have a G_STORE. Add it to the candidate if it writes to an adjacent
595c0981da4SDimitry Andric       // address.
596c0981da4SDimitry Andric       if (!addStoreToCandidate(*StoreMI, Candidate)) {
597c0981da4SDimitry Andric         // Store wasn't eligible to be added. May need to record it as a
598c0981da4SDimitry Andric         // potential alias.
599c0981da4SDimitry Andric         if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
600c0981da4SDimitry Andric           Changed |= processMergeCandidate(Candidate);
601c0981da4SDimitry Andric           continue;
602c0981da4SDimitry Andric         }
603c0981da4SDimitry Andric         Candidate.addPotentialAlias(*StoreMI);
604c0981da4SDimitry Andric       }
605c0981da4SDimitry Andric       continue;
606c0981da4SDimitry Andric     }
607c0981da4SDimitry Andric 
608c0981da4SDimitry Andric     // If we don't have any stores yet, this instruction can't pose a problem.
609c0981da4SDimitry Andric     if (Candidate.Stores.empty())
610c0981da4SDimitry Andric       continue;
611c0981da4SDimitry Andric 
612c0981da4SDimitry Andric     // We're dealing with some other kind of instruction.
613c0981da4SDimitry Andric     if (isInstHardMergeHazard(MI)) {
614c0981da4SDimitry Andric       Changed |= processMergeCandidate(Candidate);
615c0981da4SDimitry Andric       Candidate.Stores.clear();
616c0981da4SDimitry Andric       continue;
617c0981da4SDimitry Andric     }
618c0981da4SDimitry Andric 
619c0981da4SDimitry Andric     if (!MI.mayLoadOrStore())
620c0981da4SDimitry Andric       continue;
621c0981da4SDimitry Andric 
622c0981da4SDimitry Andric     if (operationAliasesWithCandidate(MI, Candidate)) {
623c0981da4SDimitry Andric       // We have a potential alias, so process the current candidate if we can
624c0981da4SDimitry Andric       // and then continue looking for a new candidate.
625c0981da4SDimitry Andric       Changed |= processMergeCandidate(Candidate);
626c0981da4SDimitry Andric       continue;
627c0981da4SDimitry Andric     }
628c0981da4SDimitry Andric 
629c0981da4SDimitry Andric     // Record this instruction as a potential alias for future stores that are
630c0981da4SDimitry Andric     // added to the candidate.
631c0981da4SDimitry Andric     Candidate.addPotentialAlias(MI);
632c0981da4SDimitry Andric   }
633c0981da4SDimitry Andric 
634c0981da4SDimitry Andric   // Process any candidate left after finishing searching the entire block.
635c0981da4SDimitry Andric   Changed |= processMergeCandidate(Candidate);
636c0981da4SDimitry Andric 
637c0981da4SDimitry Andric   // Erase instructions now that we're no longer iterating over the block.
638c0981da4SDimitry Andric   for (auto *MI : InstsToErase)
639c0981da4SDimitry Andric     MI->eraseFromParent();
640c0981da4SDimitry Andric   InstsToErase.clear();
641c0981da4SDimitry Andric   return Changed;
642c0981da4SDimitry Andric }
643c0981da4SDimitry Andric 
6447fa27ce4SDimitry Andric /// Check if the store \p Store is a truncstore that can be merged. That is,
6457fa27ce4SDimitry Andric /// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
6467fa27ce4SDimitry Andric /// Register then it does not need to match and SrcVal is set to the source
6477fa27ce4SDimitry Andric /// value found.
6487fa27ce4SDimitry Andric /// On match, returns the start byte offset of the \p SrcVal that is being
6497fa27ce4SDimitry Andric /// stored.
6507fa27ce4SDimitry Andric static std::optional<int64_t>
getTruncStoreByteOffset(GStore & Store,Register & SrcVal,MachineRegisterInfo & MRI)6517fa27ce4SDimitry Andric getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
6527fa27ce4SDimitry Andric                         MachineRegisterInfo &MRI) {
6537fa27ce4SDimitry Andric   Register TruncVal;
6547fa27ce4SDimitry Andric   if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
6557fa27ce4SDimitry Andric     return std::nullopt;
6567fa27ce4SDimitry Andric 
6577fa27ce4SDimitry Andric   // The shift amount must be a constant multiple of the narrow type.
6587fa27ce4SDimitry Andric   // It is translated to the offset address in the wide source value "y".
6597fa27ce4SDimitry Andric   //
6607fa27ce4SDimitry Andric   // x = G_LSHR y, ShiftAmtC
6617fa27ce4SDimitry Andric   // s8 z = G_TRUNC x
6627fa27ce4SDimitry Andric   // store z, ...
6637fa27ce4SDimitry Andric   Register FoundSrcVal;
6647fa27ce4SDimitry Andric   int64_t ShiftAmt;
6657fa27ce4SDimitry Andric   if (!mi_match(TruncVal, MRI,
6667fa27ce4SDimitry Andric                 m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
6677fa27ce4SDimitry Andric                          m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
6687fa27ce4SDimitry Andric     if (!SrcVal.isValid() || TruncVal == SrcVal) {
6697fa27ce4SDimitry Andric       if (!SrcVal.isValid())
6707fa27ce4SDimitry Andric         SrcVal = TruncVal;
6717fa27ce4SDimitry Andric       return 0; // If it's the lowest index store.
6727fa27ce4SDimitry Andric     }
6737fa27ce4SDimitry Andric     return std::nullopt;
6747fa27ce4SDimitry Andric   }
6757fa27ce4SDimitry Andric 
6767fa27ce4SDimitry Andric   unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
6777fa27ce4SDimitry Andric   if (ShiftAmt % NarrowBits != 0)
6787fa27ce4SDimitry Andric     return std::nullopt;
6797fa27ce4SDimitry Andric   const unsigned Offset = ShiftAmt / NarrowBits;
6807fa27ce4SDimitry Andric 
6817fa27ce4SDimitry Andric   if (SrcVal.isValid() && FoundSrcVal != SrcVal)
6827fa27ce4SDimitry Andric     return std::nullopt;
6837fa27ce4SDimitry Andric 
6847fa27ce4SDimitry Andric   if (!SrcVal.isValid())
6857fa27ce4SDimitry Andric     SrcVal = FoundSrcVal;
6867fa27ce4SDimitry Andric   else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
6877fa27ce4SDimitry Andric     return std::nullopt;
6887fa27ce4SDimitry Andric   return Offset;
6897fa27ce4SDimitry Andric }
6907fa27ce4SDimitry Andric 
6917fa27ce4SDimitry Andric /// Match a pattern where a wide type scalar value is stored by several narrow
6927fa27ce4SDimitry Andric /// stores. Fold it into a single store or a BSWAP and a store if the targets
6937fa27ce4SDimitry Andric /// supports it.
6947fa27ce4SDimitry Andric ///
6957fa27ce4SDimitry Andric /// Assuming little endian target:
6967fa27ce4SDimitry Andric ///  i8 *p = ...
6977fa27ce4SDimitry Andric ///  i32 val = ...
6987fa27ce4SDimitry Andric ///  p[0] = (val >> 0) & 0xFF;
6997fa27ce4SDimitry Andric ///  p[1] = (val >> 8) & 0xFF;
7007fa27ce4SDimitry Andric ///  p[2] = (val >> 16) & 0xFF;
7017fa27ce4SDimitry Andric ///  p[3] = (val >> 24) & 0xFF;
7027fa27ce4SDimitry Andric /// =>
7037fa27ce4SDimitry Andric ///  *((i32)p) = val;
7047fa27ce4SDimitry Andric ///
7057fa27ce4SDimitry Andric ///  i8 *p = ...
7067fa27ce4SDimitry Andric ///  i32 val = ...
7077fa27ce4SDimitry Andric ///  p[0] = (val >> 24) & 0xFF;
7087fa27ce4SDimitry Andric ///  p[1] = (val >> 16) & 0xFF;
7097fa27ce4SDimitry Andric ///  p[2] = (val >> 8) & 0xFF;
7107fa27ce4SDimitry Andric ///  p[3] = (val >> 0) & 0xFF;
7117fa27ce4SDimitry Andric /// =>
7127fa27ce4SDimitry Andric ///  *((i32)p) = BSWAP(val);
mergeTruncStore(GStore & StoreMI,SmallPtrSetImpl<GStore * > & DeletedStores)7137fa27ce4SDimitry Andric bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
7147fa27ce4SDimitry Andric                                    SmallPtrSetImpl<GStore *> &DeletedStores) {
7157fa27ce4SDimitry Andric   LLT MemTy = StoreMI.getMMO().getMemoryType();
7167fa27ce4SDimitry Andric 
7177fa27ce4SDimitry Andric   // We only handle merging simple stores of 1-4 bytes.
7187fa27ce4SDimitry Andric   if (!MemTy.isScalar())
7197fa27ce4SDimitry Andric     return false;
7207fa27ce4SDimitry Andric   switch (MemTy.getSizeInBits()) {
7217fa27ce4SDimitry Andric   case 8:
7227fa27ce4SDimitry Andric   case 16:
7237fa27ce4SDimitry Andric   case 32:
7247fa27ce4SDimitry Andric     break;
7257fa27ce4SDimitry Andric   default:
7267fa27ce4SDimitry Andric     return false;
7277fa27ce4SDimitry Andric   }
7287fa27ce4SDimitry Andric   if (!StoreMI.isSimple())
7297fa27ce4SDimitry Andric     return false;
7307fa27ce4SDimitry Andric 
7317fa27ce4SDimitry Andric   // We do a simple search for mergeable stores prior to this one.
7327fa27ce4SDimitry Andric   // Any potential alias hazard along the way terminates the search.
7337fa27ce4SDimitry Andric   SmallVector<GStore *> FoundStores;
7347fa27ce4SDimitry Andric 
7357fa27ce4SDimitry Andric   // We're looking for:
7367fa27ce4SDimitry Andric   // 1) a (store(trunc(...)))
7377fa27ce4SDimitry Andric   // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
7387fa27ce4SDimitry Andric   //    the partial value stored.
7397fa27ce4SDimitry Andric   // 3) where the offsets form either a little or big-endian sequence.
7407fa27ce4SDimitry Andric 
7417fa27ce4SDimitry Andric   auto &LastStore = StoreMI;
7427fa27ce4SDimitry Andric 
7437fa27ce4SDimitry Andric   // The single base pointer that all stores must use.
7447fa27ce4SDimitry Andric   Register BaseReg;
7457fa27ce4SDimitry Andric   int64_t LastOffset;
7467fa27ce4SDimitry Andric   if (!mi_match(LastStore.getPointerReg(), *MRI,
7477fa27ce4SDimitry Andric                 m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
7487fa27ce4SDimitry Andric     BaseReg = LastStore.getPointerReg();
7497fa27ce4SDimitry Andric     LastOffset = 0;
7507fa27ce4SDimitry Andric   }
7517fa27ce4SDimitry Andric 
7527fa27ce4SDimitry Andric   GStore *LowestIdxStore = &LastStore;
7537fa27ce4SDimitry Andric   int64_t LowestIdxOffset = LastOffset;
7547fa27ce4SDimitry Andric 
7557fa27ce4SDimitry Andric   Register WideSrcVal;
7567fa27ce4SDimitry Andric   auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI);
7577fa27ce4SDimitry Andric   if (!LowestShiftAmt)
7587fa27ce4SDimitry Andric     return false; // Didn't match a trunc.
7597fa27ce4SDimitry Andric   assert(WideSrcVal.isValid());
7607fa27ce4SDimitry Andric 
7617fa27ce4SDimitry Andric   LLT WideStoreTy = MRI->getType(WideSrcVal);
7627fa27ce4SDimitry Andric   // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
7637fa27ce4SDimitry Andric   if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
7647fa27ce4SDimitry Andric     return false;
7657fa27ce4SDimitry Andric   const unsigned NumStoresRequired =
7667fa27ce4SDimitry Andric       WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
7677fa27ce4SDimitry Andric 
7687fa27ce4SDimitry Andric   SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
7697fa27ce4SDimitry Andric   OffsetMap[*LowestShiftAmt] = LastOffset;
7707fa27ce4SDimitry Andric   FoundStores.emplace_back(&LastStore);
7717fa27ce4SDimitry Andric 
7727fa27ce4SDimitry Andric   const int MaxInstsToCheck = 10;
7737fa27ce4SDimitry Andric   int NumInstsChecked = 0;
7747fa27ce4SDimitry Andric   for (auto II = ++LastStore.getReverseIterator();
7757fa27ce4SDimitry Andric        II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
7767fa27ce4SDimitry Andric        ++II) {
7777fa27ce4SDimitry Andric     NumInstsChecked++;
7787fa27ce4SDimitry Andric     GStore *NewStore;
7797fa27ce4SDimitry Andric     if ((NewStore = dyn_cast<GStore>(&*II))) {
7807fa27ce4SDimitry Andric       if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
7817fa27ce4SDimitry Andric         break;
7827fa27ce4SDimitry Andric     } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
7837fa27ce4SDimitry Andric       break;
7847fa27ce4SDimitry Andric     } else {
7857fa27ce4SDimitry Andric       continue; // This is a safe instruction we can look past.
7867fa27ce4SDimitry Andric     }
7877fa27ce4SDimitry Andric 
7887fa27ce4SDimitry Andric     Register NewBaseReg;
7897fa27ce4SDimitry Andric     int64_t MemOffset;
7907fa27ce4SDimitry Andric     // Check we're storing to the same base + some offset.
7917fa27ce4SDimitry Andric     if (!mi_match(NewStore->getPointerReg(), *MRI,
7927fa27ce4SDimitry Andric                   m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
7937fa27ce4SDimitry Andric       NewBaseReg = NewStore->getPointerReg();
7947fa27ce4SDimitry Andric       MemOffset = 0;
7957fa27ce4SDimitry Andric     }
7967fa27ce4SDimitry Andric     if (BaseReg != NewBaseReg)
7977fa27ce4SDimitry Andric       break;
7987fa27ce4SDimitry Andric 
7997fa27ce4SDimitry Andric     auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI);
8007fa27ce4SDimitry Andric     if (!ShiftByteOffset)
8017fa27ce4SDimitry Andric       break;
8027fa27ce4SDimitry Andric     if (MemOffset < LowestIdxOffset) {
8037fa27ce4SDimitry Andric       LowestIdxOffset = MemOffset;
8047fa27ce4SDimitry Andric       LowestIdxStore = NewStore;
8057fa27ce4SDimitry Andric     }
8067fa27ce4SDimitry Andric 
8077fa27ce4SDimitry Andric     // Map the offset in the store and the offset in the combined value, and
8087fa27ce4SDimitry Andric     // early return if it has been set before.
8097fa27ce4SDimitry Andric     if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
8107fa27ce4SDimitry Andric         OffsetMap[*ShiftByteOffset] != INT64_MAX)
8117fa27ce4SDimitry Andric       break;
8127fa27ce4SDimitry Andric     OffsetMap[*ShiftByteOffset] = MemOffset;
8137fa27ce4SDimitry Andric 
8147fa27ce4SDimitry Andric     FoundStores.emplace_back(NewStore);
8157fa27ce4SDimitry Andric     // Reset counter since we've found a matching inst.
8167fa27ce4SDimitry Andric     NumInstsChecked = 0;
8177fa27ce4SDimitry Andric     if (FoundStores.size() == NumStoresRequired)
8187fa27ce4SDimitry Andric       break;
8197fa27ce4SDimitry Andric   }
8207fa27ce4SDimitry Andric 
8217fa27ce4SDimitry Andric   if (FoundStores.size() != NumStoresRequired) {
8227fa27ce4SDimitry Andric     if (FoundStores.size() == 1)
8237fa27ce4SDimitry Andric       return false;
8247fa27ce4SDimitry Andric     // We didn't find enough stores to merge into the size of the original
8257fa27ce4SDimitry Andric     // source value, but we may be able to generate a smaller store if we
8267fa27ce4SDimitry Andric     // truncate the source value.
8277fa27ce4SDimitry Andric     WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits());
8287fa27ce4SDimitry Andric   }
8297fa27ce4SDimitry Andric 
8307fa27ce4SDimitry Andric   unsigned NumStoresFound = FoundStores.size();
8317fa27ce4SDimitry Andric 
8327fa27ce4SDimitry Andric   const auto &DL = LastStore.getMF()->getDataLayout();
8337fa27ce4SDimitry Andric   auto &C = LastStore.getMF()->getFunction().getContext();
8347fa27ce4SDimitry Andric   // Check that a store of the wide type is both allowed and fast on the target
8357fa27ce4SDimitry Andric   unsigned Fast = 0;
8367fa27ce4SDimitry Andric   bool Allowed = TLI->allowsMemoryAccess(
8377fa27ce4SDimitry Andric       C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
8387fa27ce4SDimitry Andric   if (!Allowed || !Fast)
8397fa27ce4SDimitry Andric     return false;
8407fa27ce4SDimitry Andric 
8417fa27ce4SDimitry Andric   // Check if the pieces of the value are going to the expected places in memory
8427fa27ce4SDimitry Andric   // to merge the stores.
8437fa27ce4SDimitry Andric   unsigned NarrowBits = MemTy.getScalarSizeInBits();
8447fa27ce4SDimitry Andric   auto checkOffsets = [&](bool MatchLittleEndian) {
8457fa27ce4SDimitry Andric     if (MatchLittleEndian) {
8467fa27ce4SDimitry Andric       for (unsigned i = 0; i != NumStoresFound; ++i)
8477fa27ce4SDimitry Andric         if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
8487fa27ce4SDimitry Andric           return false;
8497fa27ce4SDimitry Andric     } else { // MatchBigEndian by reversing loop counter.
8507fa27ce4SDimitry Andric       for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound;
8517fa27ce4SDimitry Andric            ++i, --j)
8527fa27ce4SDimitry Andric         if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
8537fa27ce4SDimitry Andric           return false;
8547fa27ce4SDimitry Andric     }
8557fa27ce4SDimitry Andric     return true;
8567fa27ce4SDimitry Andric   };
8577fa27ce4SDimitry Andric 
8587fa27ce4SDimitry Andric   // Check if the offsets line up for the native data layout of this target.
8597fa27ce4SDimitry Andric   bool NeedBswap = false;
8607fa27ce4SDimitry Andric   bool NeedRotate = false;
8617fa27ce4SDimitry Andric   if (!checkOffsets(DL.isLittleEndian())) {
8627fa27ce4SDimitry Andric     // Special-case: check if byte offsets line up for the opposite endian.
8637fa27ce4SDimitry Andric     if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
8647fa27ce4SDimitry Andric       NeedBswap = true;
8657fa27ce4SDimitry Andric     else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian()))
8667fa27ce4SDimitry Andric       NeedRotate = true;
8677fa27ce4SDimitry Andric     else
8687fa27ce4SDimitry Andric       return false;
8697fa27ce4SDimitry Andric   }
8707fa27ce4SDimitry Andric 
8717fa27ce4SDimitry Andric   if (NeedBswap &&
8727fa27ce4SDimitry Andric       !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF))
8737fa27ce4SDimitry Andric     return false;
8747fa27ce4SDimitry Andric   if (NeedRotate &&
8757fa27ce4SDimitry Andric       !isLegalOrBeforeLegalizer(
8767fa27ce4SDimitry Andric           {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF))
8777fa27ce4SDimitry Andric     return false;
8787fa27ce4SDimitry Andric 
8797fa27ce4SDimitry Andric   Builder.setInstrAndDebugLoc(StoreMI);
8807fa27ce4SDimitry Andric 
8817fa27ce4SDimitry Andric   if (WideStoreTy != MRI->getType(WideSrcVal))
8827fa27ce4SDimitry Andric     WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0);
8837fa27ce4SDimitry Andric 
8847fa27ce4SDimitry Andric   if (NeedBswap) {
8857fa27ce4SDimitry Andric     WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
8867fa27ce4SDimitry Andric   } else if (NeedRotate) {
8877fa27ce4SDimitry Andric     assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
8887fa27ce4SDimitry Andric            "Unexpected type for rotate");
8897fa27ce4SDimitry Andric     auto RotAmt =
8907fa27ce4SDimitry Andric         Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
8917fa27ce4SDimitry Andric     WideSrcVal =
8927fa27ce4SDimitry Andric         Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
8937fa27ce4SDimitry Andric   }
8947fa27ce4SDimitry Andric 
8957fa27ce4SDimitry Andric   Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(),
8967fa27ce4SDimitry Andric                      LowestIdxStore->getMMO().getPointerInfo(),
8977fa27ce4SDimitry Andric                      LowestIdxStore->getMMO().getAlign());
8987fa27ce4SDimitry Andric 
8997fa27ce4SDimitry Andric   // Erase the old stores.
9007fa27ce4SDimitry Andric   for (auto *ST : FoundStores) {
9017fa27ce4SDimitry Andric     ST->eraseFromParent();
9027fa27ce4SDimitry Andric     DeletedStores.insert(ST);
9037fa27ce4SDimitry Andric   }
9047fa27ce4SDimitry Andric   return true;
9057fa27ce4SDimitry Andric }
9067fa27ce4SDimitry Andric 
mergeTruncStoresBlock(MachineBasicBlock & BB)9077fa27ce4SDimitry Andric bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
9087fa27ce4SDimitry Andric   bool Changed = false;
9097fa27ce4SDimitry Andric   SmallVector<GStore *, 16> Stores;
9107fa27ce4SDimitry Andric   SmallPtrSet<GStore *, 8> DeletedStores;
9117fa27ce4SDimitry Andric   // Walk up the block so we can see the most eligible stores.
9127fa27ce4SDimitry Andric   for (MachineInstr &MI : llvm::reverse(BB))
9137fa27ce4SDimitry Andric     if (auto *StoreMI = dyn_cast<GStore>(&MI))
9147fa27ce4SDimitry Andric       Stores.emplace_back(StoreMI);
9157fa27ce4SDimitry Andric 
9167fa27ce4SDimitry Andric   for (auto *StoreMI : Stores) {
9177fa27ce4SDimitry Andric     if (DeletedStores.count(StoreMI))
9187fa27ce4SDimitry Andric       continue;
9197fa27ce4SDimitry Andric     if (mergeTruncStore(*StoreMI, DeletedStores))
9207fa27ce4SDimitry Andric       Changed = true;
9217fa27ce4SDimitry Andric   }
9227fa27ce4SDimitry Andric   return Changed;
9237fa27ce4SDimitry Andric }
9247fa27ce4SDimitry Andric 
mergeFunctionStores(MachineFunction & MF)925c0981da4SDimitry Andric bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
926c0981da4SDimitry Andric   bool Changed = false;
927c0981da4SDimitry Andric   for (auto &BB : MF){
928c0981da4SDimitry Andric     Changed |= mergeBlockStores(BB);
9297fa27ce4SDimitry Andric     Changed |= mergeTruncStoresBlock(BB);
930c0981da4SDimitry Andric   }
9317fa27ce4SDimitry Andric 
9327fa27ce4SDimitry Andric   // Erase all dead instructions left over by the merging.
9337fa27ce4SDimitry Andric   if (Changed) {
9347fa27ce4SDimitry Andric     for (auto &BB : MF) {
9357fa27ce4SDimitry Andric       for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) {
9367fa27ce4SDimitry Andric         if (isTriviallyDead(I, *MRI))
9377fa27ce4SDimitry Andric           I.eraseFromParent();
9387fa27ce4SDimitry Andric       }
9397fa27ce4SDimitry Andric     }
9407fa27ce4SDimitry Andric   }
9417fa27ce4SDimitry Andric 
942c0981da4SDimitry Andric   return Changed;
943c0981da4SDimitry Andric }
944c0981da4SDimitry Andric 
initializeStoreMergeTargetInfo(unsigned AddrSpace)945c0981da4SDimitry Andric void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
946c0981da4SDimitry Andric   // Query the legalizer info to record what store types are legal.
947c0981da4SDimitry Andric   // We record this because we don't want to bother trying to merge stores into
948c0981da4SDimitry Andric   // illegal ones, which would just result in being split again.
949c0981da4SDimitry Andric 
950c0981da4SDimitry Andric   if (LegalStoreSizes.count(AddrSpace)) {
951c0981da4SDimitry Andric     assert(LegalStoreSizes[AddrSpace].any());
952c0981da4SDimitry Andric     return; // Already cached sizes for this address space.
953c0981da4SDimitry Andric   }
954c0981da4SDimitry Andric 
955c0981da4SDimitry Andric   // Need to reserve at least MaxStoreSizeToForm + 1 bits.
956c0981da4SDimitry Andric   BitVector LegalSizes(MaxStoreSizeToForm * 2);
957c0981da4SDimitry Andric   const auto &LI = *MF->getSubtarget().getLegalizerInfo();
958ac9a064cSDimitry Andric   const auto &DL = MF->getFunction().getDataLayout();
959b1c73532SDimitry Andric   Type *IRPtrTy = PointerType::get(MF->getFunction().getContext(), AddrSpace);
960b1c73532SDimitry Andric   LLT PtrTy = getLLTForType(*IRPtrTy, DL);
961c0981da4SDimitry Andric   // We assume that we're not going to be generating any stores wider than
962c0981da4SDimitry Andric   // MaxStoreSizeToForm bits for now.
963c0981da4SDimitry Andric   for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
964c0981da4SDimitry Andric     LLT Ty = LLT::scalar(Size);
965c0981da4SDimitry Andric     SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
966c0981da4SDimitry Andric         {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
967c0981da4SDimitry Andric     SmallVector<LLT> StoreTys({Ty, PtrTy});
968c0981da4SDimitry Andric     LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
969c0981da4SDimitry Andric     LegalizeActionStep ActionStep = LI.getAction(Q);
970c0981da4SDimitry Andric     if (ActionStep.Action == LegalizeActions::Legal)
971c0981da4SDimitry Andric       LegalSizes.set(Size);
972c0981da4SDimitry Andric   }
973c0981da4SDimitry Andric   assert(LegalSizes.any() && "Expected some store sizes to be legal!");
974c0981da4SDimitry Andric   LegalStoreSizes[AddrSpace] = LegalSizes;
975c0981da4SDimitry Andric }
976c0981da4SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)977c0981da4SDimitry Andric bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
978c0981da4SDimitry Andric   // If the ISel pipeline failed, do not bother running that pass.
979c0981da4SDimitry Andric   if (MF.getProperties().hasProperty(
980c0981da4SDimitry Andric           MachineFunctionProperties::Property::FailedISel))
981c0981da4SDimitry Andric     return false;
982c0981da4SDimitry Andric 
983c0981da4SDimitry Andric   LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
984c0981da4SDimitry Andric                     << '\n');
985c0981da4SDimitry Andric 
986c0981da4SDimitry Andric   init(MF);
987c0981da4SDimitry Andric   bool Changed = false;
988c0981da4SDimitry Andric   Changed |= mergeFunctionStores(MF);
989c0981da4SDimitry Andric 
990c0981da4SDimitry Andric   LegalStoreSizes.clear();
991c0981da4SDimitry Andric   return Changed;
992c0981da4SDimitry Andric }
993