xref: /src/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1cfca06d7SDimitry Andric //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2cfca06d7SDimitry Andric //
3cfca06d7SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4cfca06d7SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5cfca06d7SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cfca06d7SDimitry Andric //
7cfca06d7SDimitry Andric //===----------------------------------------------------------------------===//
8cfca06d7SDimitry Andric //
9cfca06d7SDimitry Andric // OpenMP specific optimizations:
10cfca06d7SDimitry Andric //
11cfca06d7SDimitry Andric // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12344a3780SDimitry Andric // - Replacing globalized device memory with stack memory.
13344a3780SDimitry Andric // - Replacing globalized device memory with shared memory.
14344a3780SDimitry Andric // - Parallel region merging.
15344a3780SDimitry Andric // - Transforming generic-mode device kernels to SPMD mode.
16344a3780SDimitry Andric // - Specializing the state machine for generic-mode device kernels.
17cfca06d7SDimitry Andric //
18cfca06d7SDimitry Andric //===----------------------------------------------------------------------===//
19cfca06d7SDimitry Andric 
20cfca06d7SDimitry Andric #include "llvm/Transforms/IPO/OpenMPOpt.h"
21cfca06d7SDimitry Andric 
22cfca06d7SDimitry Andric #include "llvm/ADT/EnumeratedArray.h"
23344a3780SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
246f8fc217SDimitry Andric #include "llvm/ADT/SetVector.h"
257fa27ce4SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
26e3b55780SDimitry Andric #include "llvm/ADT/SmallVector.h"
27cfca06d7SDimitry Andric #include "llvm/ADT/Statistic.h"
287fa27ce4SDimitry Andric #include "llvm/ADT/StringExtras.h"
29c0981da4SDimitry Andric #include "llvm/ADT/StringRef.h"
30cfca06d7SDimitry Andric #include "llvm/Analysis/CallGraph.h"
31cfca06d7SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
32ecbca9f5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
33cfca06d7SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
34b60736ecSDimitry Andric #include "llvm/Analysis/ValueTracking.h"
35cfca06d7SDimitry Andric #include "llvm/Frontend/OpenMP/OMPConstants.h"
36b1c73532SDimitry Andric #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
37cfca06d7SDimitry Andric #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
38344a3780SDimitry Andric #include "llvm/IR/Assumptions.h"
39e3b55780SDimitry Andric #include "llvm/IR/BasicBlock.h"
40ecbca9f5SDimitry Andric #include "llvm/IR/Constants.h"
41344a3780SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
427fa27ce4SDimitry Andric #include "llvm/IR/Dominators.h"
437fa27ce4SDimitry Andric #include "llvm/IR/Function.h"
44344a3780SDimitry Andric #include "llvm/IR/GlobalValue.h"
45ecbca9f5SDimitry Andric #include "llvm/IR/GlobalVariable.h"
46b1c73532SDimitry Andric #include "llvm/IR/InstrTypes.h"
47344a3780SDimitry Andric #include "llvm/IR/Instruction.h"
48ecbca9f5SDimitry Andric #include "llvm/IR/Instructions.h"
49344a3780SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
50c0981da4SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
51c0981da4SDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h"
52ecbca9f5SDimitry Andric #include "llvm/IR/LLVMContext.h"
537fa27ce4SDimitry Andric #include "llvm/Support/Casting.h"
54cfca06d7SDimitry Andric #include "llvm/Support/CommandLine.h"
55ecbca9f5SDimitry Andric #include "llvm/Support/Debug.h"
56cfca06d7SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h"
57b60736ecSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
58cfca06d7SDimitry Andric #include "llvm/Transforms/Utils/CallGraphUpdater.h"
59cfca06d7SDimitry Andric 
60c0981da4SDimitry Andric #include <algorithm>
61e3b55780SDimitry Andric #include <optional>
62e3b55780SDimitry Andric #include <string>
63c0981da4SDimitry Andric 
64cfca06d7SDimitry Andric using namespace llvm;
65cfca06d7SDimitry Andric using namespace omp;
66cfca06d7SDimitry Andric 
67cfca06d7SDimitry Andric #define DEBUG_TYPE "openmp-opt"
68cfca06d7SDimitry Andric 
69cfca06d7SDimitry Andric static cl::opt<bool> DisableOpenMPOptimizations(
70145449b1SDimitry Andric     "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."),
71145449b1SDimitry Andric     cl::Hidden, cl::init(false));
72cfca06d7SDimitry Andric 
73b60736ecSDimitry Andric static cl::opt<bool> EnableParallelRegionMerging(
74145449b1SDimitry Andric     "openmp-opt-enable-merging",
75b60736ecSDimitry Andric     cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
76b60736ecSDimitry Andric     cl::init(false));
77b60736ecSDimitry Andric 
78344a3780SDimitry Andric static cl::opt<bool>
79145449b1SDimitry Andric     DisableInternalization("openmp-opt-disable-internalization",
80344a3780SDimitry Andric                            cl::desc("Disable function internalization."),
81344a3780SDimitry Andric                            cl::Hidden, cl::init(false));
82344a3780SDimitry Andric 
83e3b55780SDimitry Andric static cl::opt<bool> DeduceICVValues("openmp-deduce-icv-values",
84e3b55780SDimitry Andric                                      cl::init(false), cl::Hidden);
85cfca06d7SDimitry Andric static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
86cfca06d7SDimitry Andric                                     cl::Hidden);
87cfca06d7SDimitry Andric static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
88cfca06d7SDimitry Andric                                         cl::init(false), cl::Hidden);
89cfca06d7SDimitry Andric 
90b60736ecSDimitry Andric static cl::opt<bool> HideMemoryTransferLatency(
91b60736ecSDimitry Andric     "openmp-hide-memory-transfer-latency",
92b60736ecSDimitry Andric     cl::desc("[WIP] Tries to hide the latency of host to device memory"
93b60736ecSDimitry Andric              " transfers"),
94b60736ecSDimitry Andric     cl::Hidden, cl::init(false));
95b60736ecSDimitry Andric 
96c0981da4SDimitry Andric static cl::opt<bool> DisableOpenMPOptDeglobalization(
97145449b1SDimitry Andric     "openmp-opt-disable-deglobalization",
98c0981da4SDimitry Andric     cl::desc("Disable OpenMP optimizations involving deglobalization."),
99c0981da4SDimitry Andric     cl::Hidden, cl::init(false));
100c0981da4SDimitry Andric 
101c0981da4SDimitry Andric static cl::opt<bool> DisableOpenMPOptSPMDization(
102145449b1SDimitry Andric     "openmp-opt-disable-spmdization",
103c0981da4SDimitry Andric     cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
104c0981da4SDimitry Andric     cl::Hidden, cl::init(false));
105c0981da4SDimitry Andric 
106c0981da4SDimitry Andric static cl::opt<bool> DisableOpenMPOptFolding(
107145449b1SDimitry Andric     "openmp-opt-disable-folding",
108c0981da4SDimitry Andric     cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
109c0981da4SDimitry Andric     cl::init(false));
110c0981da4SDimitry Andric 
111c0981da4SDimitry Andric static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
112145449b1SDimitry Andric     "openmp-opt-disable-state-machine-rewrite",
113c0981da4SDimitry Andric     cl::desc("Disable OpenMP optimizations that replace the state machine."),
114c0981da4SDimitry Andric     cl::Hidden, cl::init(false));
115c0981da4SDimitry Andric 
116ecbca9f5SDimitry Andric static cl::opt<bool> DisableOpenMPOptBarrierElimination(
117145449b1SDimitry Andric     "openmp-opt-disable-barrier-elimination",
118ecbca9f5SDimitry Andric     cl::desc("Disable OpenMP optimizations that eliminate barriers."),
119ecbca9f5SDimitry Andric     cl::Hidden, cl::init(false));
120ecbca9f5SDimitry Andric 
121c0981da4SDimitry Andric static cl::opt<bool> PrintModuleAfterOptimizations(
122145449b1SDimitry Andric     "openmp-opt-print-module-after",
123c0981da4SDimitry Andric     cl::desc("Print the current module after OpenMP optimizations."),
124c0981da4SDimitry Andric     cl::Hidden, cl::init(false));
125c0981da4SDimitry Andric 
126145449b1SDimitry Andric static cl::opt<bool> PrintModuleBeforeOptimizations(
127145449b1SDimitry Andric     "openmp-opt-print-module-before",
128145449b1SDimitry Andric     cl::desc("Print the current module before OpenMP optimizations."),
129145449b1SDimitry Andric     cl::Hidden, cl::init(false));
130145449b1SDimitry Andric 
131c0981da4SDimitry Andric static cl::opt<bool> AlwaysInlineDeviceFunctions(
132145449b1SDimitry Andric     "openmp-opt-inline-device",
133c0981da4SDimitry Andric     cl::desc("Inline all applicible functions on the device."), cl::Hidden,
134c0981da4SDimitry Andric     cl::init(false));
135c0981da4SDimitry Andric 
136c0981da4SDimitry Andric static cl::opt<bool>
137145449b1SDimitry Andric     EnableVerboseRemarks("openmp-opt-verbose-remarks",
138c0981da4SDimitry Andric                          cl::desc("Enables more verbose remarks."), cl::Hidden,
139c0981da4SDimitry Andric                          cl::init(false));
140c0981da4SDimitry Andric 
141c0981da4SDimitry Andric static cl::opt<unsigned>
142c0981da4SDimitry Andric     SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
143c0981da4SDimitry Andric                           cl::desc("Maximal number of attributor iterations."),
144c0981da4SDimitry Andric                           cl::init(256));
145c0981da4SDimitry Andric 
146145449b1SDimitry Andric static cl::opt<unsigned>
147145449b1SDimitry Andric     SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden,
148145449b1SDimitry Andric                       cl::desc("Maximum amount of shared memory to use."),
149145449b1SDimitry Andric                       cl::init(std::numeric_limits<unsigned>::max()));
150145449b1SDimitry Andric 
151cfca06d7SDimitry Andric STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
152cfca06d7SDimitry Andric           "Number of OpenMP runtime calls deduplicated");
153cfca06d7SDimitry Andric STATISTIC(NumOpenMPParallelRegionsDeleted,
154cfca06d7SDimitry Andric           "Number of OpenMP parallel regions deleted");
155cfca06d7SDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
156cfca06d7SDimitry Andric           "Number of OpenMP runtime functions identified");
157cfca06d7SDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
158cfca06d7SDimitry Andric           "Number of OpenMP runtime function uses identified");
159cfca06d7SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernels,
160cfca06d7SDimitry Andric           "Number of OpenMP target region entry points (=kernels) identified");
161b1c73532SDimitry Andric STATISTIC(NumNonOpenMPTargetRegionKernels,
162b1c73532SDimitry Andric           "Number of non-OpenMP target region kernels identified");
163344a3780SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsSPMD,
164344a3780SDimitry Andric           "Number of OpenMP target region entry points (=kernels) executed in "
165344a3780SDimitry Andric           "SPMD-mode instead of generic-mode");
166344a3780SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,
167344a3780SDimitry Andric           "Number of OpenMP target region entry points (=kernels) executed in "
168344a3780SDimitry Andric           "generic-mode without a state machines");
169344a3780SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,
170344a3780SDimitry Andric           "Number of OpenMP target region entry points (=kernels) executed in "
171344a3780SDimitry Andric           "generic-mode with customized state machines with fallback");
172344a3780SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,
173344a3780SDimitry Andric           "Number of OpenMP target region entry points (=kernels) executed in "
174344a3780SDimitry Andric           "generic-mode with customized state machines without fallback");
175cfca06d7SDimitry Andric STATISTIC(
176cfca06d7SDimitry Andric     NumOpenMPParallelRegionsReplacedInGPUStateMachine,
177cfca06d7SDimitry Andric     "Number of OpenMP parallel regions replaced with ID in GPU state machines");
178b60736ecSDimitry Andric STATISTIC(NumOpenMPParallelRegionsMerged,
179b60736ecSDimitry Andric           "Number of OpenMP parallel regions merged");
180344a3780SDimitry Andric STATISTIC(NumBytesMovedToSharedMemory,
181344a3780SDimitry Andric           "Amount of memory pushed to shared memory");
182ecbca9f5SDimitry Andric STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
183cfca06d7SDimitry Andric 
184cfca06d7SDimitry Andric #if !defined(NDEBUG)
185cfca06d7SDimitry Andric static constexpr auto TAG = "[" DEBUG_TYPE "]";
186cfca06d7SDimitry Andric #endif
187cfca06d7SDimitry Andric 
188b1c73532SDimitry Andric namespace KernelInfo {
189b1c73532SDimitry Andric 
190b1c73532SDimitry Andric // struct ConfigurationEnvironmentTy {
191b1c73532SDimitry Andric //   uint8_t UseGenericStateMachine;
192b1c73532SDimitry Andric //   uint8_t MayUseNestedParallelism;
193b1c73532SDimitry Andric //   llvm::omp::OMPTgtExecModeFlags ExecMode;
194b1c73532SDimitry Andric //   int32_t MinThreads;
195b1c73532SDimitry Andric //   int32_t MaxThreads;
196b1c73532SDimitry Andric //   int32_t MinTeams;
197b1c73532SDimitry Andric //   int32_t MaxTeams;
198b1c73532SDimitry Andric // };
199b1c73532SDimitry Andric 
200b1c73532SDimitry Andric // struct DynamicEnvironmentTy {
201b1c73532SDimitry Andric //   uint16_t DebugIndentionLevel;
202b1c73532SDimitry Andric // };
203b1c73532SDimitry Andric 
204b1c73532SDimitry Andric // struct KernelEnvironmentTy {
205b1c73532SDimitry Andric //   ConfigurationEnvironmentTy Configuration;
206b1c73532SDimitry Andric //   IdentTy *Ident;
207b1c73532SDimitry Andric //   DynamicEnvironmentTy *DynamicEnv;
208b1c73532SDimitry Andric // };
209b1c73532SDimitry Andric 
210b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX)                                    \
211b1c73532SDimitry Andric   constexpr const unsigned MEMBER##Idx = IDX;
212b1c73532SDimitry Andric 
213b1c73532SDimitry Andric KERNEL_ENVIRONMENT_IDX(Configuration, 0)
214b1c73532SDimitry Andric KERNEL_ENVIRONMENT_IDX(Ident, 1)
215b1c73532SDimitry Andric 
216b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_IDX
217b1c73532SDimitry Andric 
218b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX)                      \
219b1c73532SDimitry Andric   constexpr const unsigned MEMBER##Idx = IDX;
220b1c73532SDimitry Andric 
221b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
222b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
223b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2)
224b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinThreads, 3)
225b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxThreads, 4)
226b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinTeams, 5)
227b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxTeams, 6)
228b1c73532SDimitry Andric 
229b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX
230b1c73532SDimitry Andric 
231b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE)                          \
232b1c73532SDimitry Andric   RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \
233b1c73532SDimitry Andric     return cast<RETURNTYPE>(KernelEnvC->getAggregateElement(MEMBER##Idx));     \
234b1c73532SDimitry Andric   }
235b1c73532SDimitry Andric 
KERNEL_ENVIRONMENT_GETTER(Ident,Constant)236b1c73532SDimitry Andric KERNEL_ENVIRONMENT_GETTER(Ident, Constant)
237b1c73532SDimitry Andric KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct)
238b1c73532SDimitry Andric 
239b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_GETTER
240b1c73532SDimitry Andric 
241b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER)                        \
242b1c73532SDimitry Andric   ConstantInt *get##MEMBER##FromKernelEnvironment(                             \
243b1c73532SDimitry Andric       ConstantStruct *KernelEnvC) {                                            \
244b1c73532SDimitry Andric     ConstantStruct *ConfigC =                                                  \
245b1c73532SDimitry Andric         getConfigurationFromKernelEnvironment(KernelEnvC);                     \
246b1c73532SDimitry Andric     return dyn_cast<ConstantInt>(ConfigC->getAggregateElement(MEMBER##Idx));   \
247b1c73532SDimitry Andric   }
248b1c73532SDimitry Andric 
249b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine)
250b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism)
251b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode)
252b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinThreads)
253b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxThreads)
254b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinTeams)
255b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)
256b1c73532SDimitry Andric 
257b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER
258b1c73532SDimitry Andric 
259b1c73532SDimitry Andric GlobalVariable *
260b1c73532SDimitry Andric getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) {
261b1c73532SDimitry Andric   constexpr const int InitKernelEnvironmentArgNo = 0;
262b1c73532SDimitry Andric   return cast<GlobalVariable>(
263b1c73532SDimitry Andric       KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)
264b1c73532SDimitry Andric           ->stripPointerCasts());
265b1c73532SDimitry Andric }
266b1c73532SDimitry Andric 
getKernelEnvironementFromKernelInitCB(CallBase * KernelInitCB)267b1c73532SDimitry Andric ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) {
268b1c73532SDimitry Andric   GlobalVariable *KernelEnvGV =
269b1c73532SDimitry Andric       getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
270b1c73532SDimitry Andric   return cast<ConstantStruct>(KernelEnvGV->getInitializer());
271b1c73532SDimitry Andric }
272b1c73532SDimitry Andric } // namespace KernelInfo
273b1c73532SDimitry Andric 
274cfca06d7SDimitry Andric namespace {
275cfca06d7SDimitry Andric 
276344a3780SDimitry Andric struct AAHeapToShared;
277344a3780SDimitry Andric 
278cfca06d7SDimitry Andric struct AAICVTracker;
279cfca06d7SDimitry Andric 
280cfca06d7SDimitry Andric /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
281cfca06d7SDimitry Andric /// Attributor runs.
282cfca06d7SDimitry Andric struct OMPInformationCache : public InformationCache {
OMPInformationCache__anon7bbaa8dc0111::OMPInformationCache283cfca06d7SDimitry Andric   OMPInformationCache(Module &M, AnalysisGetter &AG,
284e3b55780SDimitry Andric                       BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
2857fa27ce4SDimitry Andric                       bool OpenMPPostLink)
286e3b55780SDimitry Andric       : InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
2877fa27ce4SDimitry Andric         OpenMPPostLink(OpenMPPostLink) {
288cfca06d7SDimitry Andric 
289b1c73532SDimitry Andric     OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M);
290cfca06d7SDimitry Andric     OMPBuilder.initialize();
291e3b55780SDimitry Andric     initializeRuntimeFunctions(M);
292cfca06d7SDimitry Andric     initializeInternalControlVars();
293cfca06d7SDimitry Andric   }
294cfca06d7SDimitry Andric 
295cfca06d7SDimitry Andric   /// Generic information that describes an internal control variable.
296cfca06d7SDimitry Andric   struct InternalControlVarInfo {
297cfca06d7SDimitry Andric     /// The kind, as described by InternalControlVar enum.
298cfca06d7SDimitry Andric     InternalControlVar Kind;
299cfca06d7SDimitry Andric 
300cfca06d7SDimitry Andric     /// The name of the ICV.
301cfca06d7SDimitry Andric     StringRef Name;
302cfca06d7SDimitry Andric 
303cfca06d7SDimitry Andric     /// Environment variable associated with this ICV.
304cfca06d7SDimitry Andric     StringRef EnvVarName;
305cfca06d7SDimitry Andric 
306cfca06d7SDimitry Andric     /// Initial value kind.
307cfca06d7SDimitry Andric     ICVInitValue InitKind;
308cfca06d7SDimitry Andric 
309cfca06d7SDimitry Andric     /// Initial value.
310cfca06d7SDimitry Andric     ConstantInt *InitValue;
311cfca06d7SDimitry Andric 
312cfca06d7SDimitry Andric     /// Setter RTL function associated with this ICV.
313cfca06d7SDimitry Andric     RuntimeFunction Setter;
314cfca06d7SDimitry Andric 
315cfca06d7SDimitry Andric     /// Getter RTL function associated with this ICV.
316cfca06d7SDimitry Andric     RuntimeFunction Getter;
317cfca06d7SDimitry Andric 
318cfca06d7SDimitry Andric     /// RTL Function corresponding to the override clause of this ICV
319cfca06d7SDimitry Andric     RuntimeFunction Clause;
320cfca06d7SDimitry Andric   };
321cfca06d7SDimitry Andric 
322cfca06d7SDimitry Andric   /// Generic information that describes a runtime function
323cfca06d7SDimitry Andric   struct RuntimeFunctionInfo {
324cfca06d7SDimitry Andric 
325cfca06d7SDimitry Andric     /// The kind, as described by the RuntimeFunction enum.
326cfca06d7SDimitry Andric     RuntimeFunction Kind;
327cfca06d7SDimitry Andric 
328cfca06d7SDimitry Andric     /// The name of the function.
329cfca06d7SDimitry Andric     StringRef Name;
330cfca06d7SDimitry Andric 
331cfca06d7SDimitry Andric     /// Flag to indicate a variadic function.
332cfca06d7SDimitry Andric     bool IsVarArg;
333cfca06d7SDimitry Andric 
334cfca06d7SDimitry Andric     /// The return type of the function.
335cfca06d7SDimitry Andric     Type *ReturnType;
336cfca06d7SDimitry Andric 
337cfca06d7SDimitry Andric     /// The argument types of the function.
338cfca06d7SDimitry Andric     SmallVector<Type *, 8> ArgumentTypes;
339cfca06d7SDimitry Andric 
340cfca06d7SDimitry Andric     /// The declaration if available.
341cfca06d7SDimitry Andric     Function *Declaration = nullptr;
342cfca06d7SDimitry Andric 
343cfca06d7SDimitry Andric     /// Uses of this runtime function per function containing the use.
344cfca06d7SDimitry Andric     using UseVector = SmallVector<Use *, 16>;
345cfca06d7SDimitry Andric 
346cfca06d7SDimitry Andric     /// Clear UsesMap for runtime function.
clearUsesMap__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo347cfca06d7SDimitry Andric     void clearUsesMap() { UsesMap.clear(); }
348cfca06d7SDimitry Andric 
349cfca06d7SDimitry Andric     /// Boolean conversion that is true if the runtime function was found.
operator bool__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo350cfca06d7SDimitry Andric     operator bool() const { return Declaration; }
351cfca06d7SDimitry Andric 
352cfca06d7SDimitry Andric     /// Return the vector of uses in function \p F.
getOrCreateUseVector__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo353cfca06d7SDimitry Andric     UseVector &getOrCreateUseVector(Function *F) {
354cfca06d7SDimitry Andric       std::shared_ptr<UseVector> &UV = UsesMap[F];
355cfca06d7SDimitry Andric       if (!UV)
356cfca06d7SDimitry Andric         UV = std::make_shared<UseVector>();
357cfca06d7SDimitry Andric       return *UV;
358cfca06d7SDimitry Andric     }
359cfca06d7SDimitry Andric 
360cfca06d7SDimitry Andric     /// Return the vector of uses in function \p F or `nullptr` if there are
361cfca06d7SDimitry Andric     /// none.
getUseVector__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo362cfca06d7SDimitry Andric     const UseVector *getUseVector(Function &F) const {
363cfca06d7SDimitry Andric       auto I = UsesMap.find(&F);
364cfca06d7SDimitry Andric       if (I != UsesMap.end())
365cfca06d7SDimitry Andric         return I->second.get();
366cfca06d7SDimitry Andric       return nullptr;
367cfca06d7SDimitry Andric     }
368cfca06d7SDimitry Andric 
369cfca06d7SDimitry Andric     /// Return how many functions contain uses of this runtime function.
getNumFunctionsWithUses__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo370cfca06d7SDimitry Andric     size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
371cfca06d7SDimitry Andric 
372cfca06d7SDimitry Andric     /// Return the number of arguments (or the minimal number for variadic
373cfca06d7SDimitry Andric     /// functions).
getNumArgs__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo374cfca06d7SDimitry Andric     size_t getNumArgs() const { return ArgumentTypes.size(); }
375cfca06d7SDimitry Andric 
376cfca06d7SDimitry Andric     /// Run the callback \p CB on each use and forget the use if the result is
377cfca06d7SDimitry Andric     /// true. The callback will be fed the function in which the use was
378cfca06d7SDimitry Andric     /// encountered as second argument.
foreachUse__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo379cfca06d7SDimitry Andric     void foreachUse(SmallVectorImpl<Function *> &SCC,
380cfca06d7SDimitry Andric                     function_ref<bool(Use &, Function &)> CB) {
381cfca06d7SDimitry Andric       for (Function *F : SCC)
382cfca06d7SDimitry Andric         foreachUse(CB, F);
383cfca06d7SDimitry Andric     }
384cfca06d7SDimitry Andric 
385cfca06d7SDimitry Andric     /// Run the callback \p CB on each use within the function \p F and forget
386cfca06d7SDimitry Andric     /// the use if the result is true.
foreachUse__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo387cfca06d7SDimitry Andric     void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
388cfca06d7SDimitry Andric       SmallVector<unsigned, 8> ToBeDeleted;
389cfca06d7SDimitry Andric       ToBeDeleted.clear();
390cfca06d7SDimitry Andric 
391cfca06d7SDimitry Andric       unsigned Idx = 0;
392cfca06d7SDimitry Andric       UseVector &UV = getOrCreateUseVector(F);
393cfca06d7SDimitry Andric 
394cfca06d7SDimitry Andric       for (Use *U : UV) {
395cfca06d7SDimitry Andric         if (CB(*U, *F))
396cfca06d7SDimitry Andric           ToBeDeleted.push_back(Idx);
397cfca06d7SDimitry Andric         ++Idx;
398cfca06d7SDimitry Andric       }
399cfca06d7SDimitry Andric 
400cfca06d7SDimitry Andric       // Remove the to-be-deleted indices in reverse order as prior
401cfca06d7SDimitry Andric       // modifications will not modify the smaller indices.
402cfca06d7SDimitry Andric       while (!ToBeDeleted.empty()) {
403cfca06d7SDimitry Andric         unsigned Idx = ToBeDeleted.pop_back_val();
404cfca06d7SDimitry Andric         UV[Idx] = UV.back();
405cfca06d7SDimitry Andric         UV.pop_back();
406cfca06d7SDimitry Andric       }
407cfca06d7SDimitry Andric     }
408cfca06d7SDimitry Andric 
409cfca06d7SDimitry Andric   private:
410cfca06d7SDimitry Andric     /// Map from functions to all uses of this runtime function contained in
411cfca06d7SDimitry Andric     /// them.
412cfca06d7SDimitry Andric     DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
413344a3780SDimitry Andric 
414344a3780SDimitry Andric   public:
415344a3780SDimitry Andric     /// Iterators for the uses of this runtime function.
begin__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo416344a3780SDimitry Andric     decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
end__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo417344a3780SDimitry Andric     decltype(UsesMap)::iterator end() { return UsesMap.end(); }
418cfca06d7SDimitry Andric   };
419cfca06d7SDimitry Andric 
420cfca06d7SDimitry Andric   /// An OpenMP-IR-Builder instance
421cfca06d7SDimitry Andric   OpenMPIRBuilder OMPBuilder;
422cfca06d7SDimitry Andric 
423cfca06d7SDimitry Andric   /// Map from runtime function kind to the runtime function description.
424cfca06d7SDimitry Andric   EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
425cfca06d7SDimitry Andric                   RuntimeFunction::OMPRTL___last>
426cfca06d7SDimitry Andric       RFIs;
427cfca06d7SDimitry Andric 
428344a3780SDimitry Andric   /// Map from function declarations/definitions to their runtime enum type.
429344a3780SDimitry Andric   DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;
430344a3780SDimitry Andric 
431cfca06d7SDimitry Andric   /// Map from ICV kind to the ICV description.
432cfca06d7SDimitry Andric   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
433cfca06d7SDimitry Andric                   InternalControlVar::ICV___last>
434cfca06d7SDimitry Andric       ICVs;
435cfca06d7SDimitry Andric 
436cfca06d7SDimitry Andric   /// Helper to initialize all internal control variable information for those
437cfca06d7SDimitry Andric   /// defined in OMPKinds.def.
initializeInternalControlVars__anon7bbaa8dc0111::OMPInformationCache438cfca06d7SDimitry Andric   void initializeInternalControlVars() {
439cfca06d7SDimitry Andric #define ICV_RT_SET(_Name, RTL)                                                 \
440cfca06d7SDimitry Andric   {                                                                            \
441cfca06d7SDimitry Andric     auto &ICV = ICVs[_Name];                                                   \
442cfca06d7SDimitry Andric     ICV.Setter = RTL;                                                          \
443cfca06d7SDimitry Andric   }
444cfca06d7SDimitry Andric #define ICV_RT_GET(Name, RTL)                                                  \
445cfca06d7SDimitry Andric   {                                                                            \
446cfca06d7SDimitry Andric     auto &ICV = ICVs[Name];                                                    \
447cfca06d7SDimitry Andric     ICV.Getter = RTL;                                                          \
448cfca06d7SDimitry Andric   }
449cfca06d7SDimitry Andric #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
450cfca06d7SDimitry Andric   {                                                                            \
451cfca06d7SDimitry Andric     auto &ICV = ICVs[Enum];                                                    \
452cfca06d7SDimitry Andric     ICV.Name = _Name;                                                          \
453cfca06d7SDimitry Andric     ICV.Kind = Enum;                                                           \
454cfca06d7SDimitry Andric     ICV.InitKind = Init;                                                       \
455cfca06d7SDimitry Andric     ICV.EnvVarName = _EnvVarName;                                              \
456cfca06d7SDimitry Andric     switch (ICV.InitKind) {                                                    \
457cfca06d7SDimitry Andric     case ICV_IMPLEMENTATION_DEFINED:                                           \
458cfca06d7SDimitry Andric       ICV.InitValue = nullptr;                                                 \
459cfca06d7SDimitry Andric       break;                                                                   \
460cfca06d7SDimitry Andric     case ICV_ZERO:                                                             \
461cfca06d7SDimitry Andric       ICV.InitValue = ConstantInt::get(                                        \
462cfca06d7SDimitry Andric           Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0);                \
463cfca06d7SDimitry Andric       break;                                                                   \
464cfca06d7SDimitry Andric     case ICV_FALSE:                                                            \
465cfca06d7SDimitry Andric       ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext());    \
466cfca06d7SDimitry Andric       break;                                                                   \
467cfca06d7SDimitry Andric     case ICV_LAST:                                                             \
468cfca06d7SDimitry Andric       break;                                                                   \
469cfca06d7SDimitry Andric     }                                                                          \
470cfca06d7SDimitry Andric   }
471cfca06d7SDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def"
472cfca06d7SDimitry Andric   }
473cfca06d7SDimitry Andric 
474cfca06d7SDimitry Andric   /// Returns true if the function declaration \p F matches the runtime
475cfca06d7SDimitry Andric   /// function types, that is, return type \p RTFRetType, and argument types
476cfca06d7SDimitry Andric   /// \p RTFArgTypes.
declMatchesRTFTypes__anon7bbaa8dc0111::OMPInformationCache477cfca06d7SDimitry Andric   static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
478cfca06d7SDimitry Andric                                   SmallVector<Type *, 8> &RTFArgTypes) {
479cfca06d7SDimitry Andric     // TODO: We should output information to the user (under debug output
480cfca06d7SDimitry Andric     //       and via remarks).
481cfca06d7SDimitry Andric 
482cfca06d7SDimitry Andric     if (!F)
483cfca06d7SDimitry Andric       return false;
484cfca06d7SDimitry Andric     if (F->getReturnType() != RTFRetType)
485cfca06d7SDimitry Andric       return false;
486cfca06d7SDimitry Andric     if (F->arg_size() != RTFArgTypes.size())
487cfca06d7SDimitry Andric       return false;
488cfca06d7SDimitry Andric 
489c0981da4SDimitry Andric     auto *RTFTyIt = RTFArgTypes.begin();
490cfca06d7SDimitry Andric     for (Argument &Arg : F->args()) {
491cfca06d7SDimitry Andric       if (Arg.getType() != *RTFTyIt)
492cfca06d7SDimitry Andric         return false;
493cfca06d7SDimitry Andric 
494cfca06d7SDimitry Andric       ++RTFTyIt;
495cfca06d7SDimitry Andric     }
496cfca06d7SDimitry Andric 
497cfca06d7SDimitry Andric     return true;
498cfca06d7SDimitry Andric   }
499cfca06d7SDimitry Andric 
500cfca06d7SDimitry Andric   // Helper to collect all uses of the declaration in the UsesMap.
collectUses__anon7bbaa8dc0111::OMPInformationCache501cfca06d7SDimitry Andric   unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
502cfca06d7SDimitry Andric     unsigned NumUses = 0;
503cfca06d7SDimitry Andric     if (!RFI.Declaration)
504cfca06d7SDimitry Andric       return NumUses;
505cfca06d7SDimitry Andric     OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
506cfca06d7SDimitry Andric 
507cfca06d7SDimitry Andric     if (CollectStats) {
508cfca06d7SDimitry Andric       NumOpenMPRuntimeFunctionsIdentified += 1;
509cfca06d7SDimitry Andric       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
510cfca06d7SDimitry Andric     }
511cfca06d7SDimitry Andric 
512cfca06d7SDimitry Andric     // TODO: We directly convert uses into proper calls and unknown uses.
513cfca06d7SDimitry Andric     for (Use &U : RFI.Declaration->uses()) {
514cfca06d7SDimitry Andric       if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
5157fa27ce4SDimitry Andric         if (!CGSCC || CGSCC->empty() || CGSCC->contains(UserI->getFunction())) {
516cfca06d7SDimitry Andric           RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
517cfca06d7SDimitry Andric           ++NumUses;
518cfca06d7SDimitry Andric         }
519cfca06d7SDimitry Andric       } else {
520cfca06d7SDimitry Andric         RFI.getOrCreateUseVector(nullptr).push_back(&U);
521cfca06d7SDimitry Andric         ++NumUses;
522cfca06d7SDimitry Andric       }
523cfca06d7SDimitry Andric     }
524cfca06d7SDimitry Andric     return NumUses;
525cfca06d7SDimitry Andric   }
526cfca06d7SDimitry Andric 
527b60736ecSDimitry Andric   // Helper function to recollect uses of a runtime function.
recollectUsesForFunction__anon7bbaa8dc0111::OMPInformationCache528b60736ecSDimitry Andric   void recollectUsesForFunction(RuntimeFunction RTF) {
529b60736ecSDimitry Andric     auto &RFI = RFIs[RTF];
530cfca06d7SDimitry Andric     RFI.clearUsesMap();
531cfca06d7SDimitry Andric     collectUses(RFI, /*CollectStats*/ false);
532cfca06d7SDimitry Andric   }
533b60736ecSDimitry Andric 
534b60736ecSDimitry Andric   // Helper function to recollect uses of all runtime functions.
recollectUses__anon7bbaa8dc0111::OMPInformationCache535b60736ecSDimitry Andric   void recollectUses() {
536b60736ecSDimitry Andric     for (int Idx = 0; Idx < RFIs.size(); ++Idx)
537b60736ecSDimitry Andric       recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
538cfca06d7SDimitry Andric   }
539cfca06d7SDimitry Andric 
5406f8fc217SDimitry Andric   // Helper function to inherit the calling convention of the function callee.
setCallingConvention__anon7bbaa8dc0111::OMPInformationCache5416f8fc217SDimitry Andric   void setCallingConvention(FunctionCallee Callee, CallInst *CI) {
5426f8fc217SDimitry Andric     if (Function *Fn = dyn_cast<Function>(Callee.getCallee()))
5436f8fc217SDimitry Andric       CI->setCallingConv(Fn->getCallingConv());
5446f8fc217SDimitry Andric   }
5456f8fc217SDimitry Andric 
5467fa27ce4SDimitry Andric   // Helper function to determine if it's legal to create a call to the runtime
5477fa27ce4SDimitry Andric   // functions.
runtimeFnsAvailable__anon7bbaa8dc0111::OMPInformationCache5487fa27ce4SDimitry Andric   bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) {
5497fa27ce4SDimitry Andric     // We can always emit calls if we haven't yet linked in the runtime.
5507fa27ce4SDimitry Andric     if (!OpenMPPostLink)
5517fa27ce4SDimitry Andric       return true;
5527fa27ce4SDimitry Andric 
5537fa27ce4SDimitry Andric     // Once the runtime has been already been linked in we cannot emit calls to
5547fa27ce4SDimitry Andric     // any undefined functions.
5557fa27ce4SDimitry Andric     for (RuntimeFunction Fn : Fns) {
5567fa27ce4SDimitry Andric       RuntimeFunctionInfo &RFI = RFIs[Fn];
5577fa27ce4SDimitry Andric 
5587fa27ce4SDimitry Andric       if (RFI.Declaration && RFI.Declaration->isDeclaration())
5597fa27ce4SDimitry Andric         return false;
5607fa27ce4SDimitry Andric     }
5617fa27ce4SDimitry Andric     return true;
5627fa27ce4SDimitry Andric   }
5637fa27ce4SDimitry Andric 
564cfca06d7SDimitry Andric   /// Helper to initialize all runtime function information for those defined
565cfca06d7SDimitry Andric   /// in OpenMPKinds.def.
initializeRuntimeFunctions__anon7bbaa8dc0111::OMPInformationCache566e3b55780SDimitry Andric   void initializeRuntimeFunctions(Module &M) {
567cfca06d7SDimitry Andric 
568cfca06d7SDimitry Andric     // Helper macros for handling __VA_ARGS__ in OMP_RTL
569cfca06d7SDimitry Andric #define OMP_TYPE(VarName, ...)                                                 \
570cfca06d7SDimitry Andric   Type *VarName = OMPBuilder.VarName;                                          \
571cfca06d7SDimitry Andric   (void)VarName;
572cfca06d7SDimitry Andric 
573cfca06d7SDimitry Andric #define OMP_ARRAY_TYPE(VarName, ...)                                           \
574cfca06d7SDimitry Andric   ArrayType *VarName##Ty = OMPBuilder.VarName##Ty;                             \
575cfca06d7SDimitry Andric   (void)VarName##Ty;                                                           \
576cfca06d7SDimitry Andric   PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy;                     \
577cfca06d7SDimitry Andric   (void)VarName##PtrTy;
578cfca06d7SDimitry Andric 
579cfca06d7SDimitry Andric #define OMP_FUNCTION_TYPE(VarName, ...)                                        \
580cfca06d7SDimitry Andric   FunctionType *VarName = OMPBuilder.VarName;                                  \
581cfca06d7SDimitry Andric   (void)VarName;                                                               \
582cfca06d7SDimitry Andric   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
583cfca06d7SDimitry Andric   (void)VarName##Ptr;
584cfca06d7SDimitry Andric 
585cfca06d7SDimitry Andric #define OMP_STRUCT_TYPE(VarName, ...)                                          \
586cfca06d7SDimitry Andric   StructType *VarName = OMPBuilder.VarName;                                    \
587cfca06d7SDimitry Andric   (void)VarName;                                                               \
588cfca06d7SDimitry Andric   PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
589cfca06d7SDimitry Andric   (void)VarName##Ptr;
590cfca06d7SDimitry Andric 
591cfca06d7SDimitry Andric #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
592cfca06d7SDimitry Andric   {                                                                            \
593cfca06d7SDimitry Andric     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
594cfca06d7SDimitry Andric     Function *F = M.getFunction(_Name);                                        \
595344a3780SDimitry Andric     RTLFunctions.insert(F);                                                    \
596cfca06d7SDimitry Andric     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
597344a3780SDimitry Andric       RuntimeFunctionIDMap[F] = _Enum;                                         \
598cfca06d7SDimitry Andric       auto &RFI = RFIs[_Enum];                                                 \
599cfca06d7SDimitry Andric       RFI.Kind = _Enum;                                                        \
600cfca06d7SDimitry Andric       RFI.Name = _Name;                                                        \
601cfca06d7SDimitry Andric       RFI.IsVarArg = _IsVarArg;                                                \
602cfca06d7SDimitry Andric       RFI.ReturnType = OMPBuilder._ReturnType;                                 \
603cfca06d7SDimitry Andric       RFI.ArgumentTypes = std::move(ArgsTypes);                                \
604cfca06d7SDimitry Andric       RFI.Declaration = F;                                                     \
605cfca06d7SDimitry Andric       unsigned NumUses = collectUses(RFI);                                     \
606cfca06d7SDimitry Andric       (void)NumUses;                                                           \
607cfca06d7SDimitry Andric       LLVM_DEBUG({                                                             \
608cfca06d7SDimitry Andric         dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \
609cfca06d7SDimitry Andric                << " found\n";                                                  \
610cfca06d7SDimitry Andric         if (RFI.Declaration)                                                   \
611cfca06d7SDimitry Andric           dbgs() << TAG << "-> got " << NumUses << " uses in "                 \
612cfca06d7SDimitry Andric                  << RFI.getNumFunctionsWithUses()                              \
613cfca06d7SDimitry Andric                  << " different functions.\n";                                 \
614cfca06d7SDimitry Andric       });                                                                      \
615cfca06d7SDimitry Andric     }                                                                          \
616cfca06d7SDimitry Andric   }
617cfca06d7SDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def"
618cfca06d7SDimitry Andric 
619e3b55780SDimitry Andric     // Remove the `noinline` attribute from `__kmpc`, `ompx::` and `omp_`
620e3b55780SDimitry Andric     // functions, except if `optnone` is present.
621e3b55780SDimitry Andric     if (isOpenMPDevice(M)) {
622e3b55780SDimitry Andric       for (Function &F : M) {
623e3b55780SDimitry Andric         for (StringRef Prefix : {"__kmpc", "_ZN4ompx", "omp_"})
624e3b55780SDimitry Andric           if (F.hasFnAttribute(Attribute::NoInline) &&
625b1c73532SDimitry Andric               F.getName().starts_with(Prefix) &&
626e3b55780SDimitry Andric               !F.hasFnAttribute(Attribute::OptimizeNone))
627e3b55780SDimitry Andric             F.removeFnAttr(Attribute::NoInline);
628e3b55780SDimitry Andric       }
629e3b55780SDimitry Andric     }
630e3b55780SDimitry Andric 
631cfca06d7SDimitry Andric     // TODO: We should attach the attributes defined in OMPKinds.def.
632cfca06d7SDimitry Andric   }
633cfca06d7SDimitry Andric 
634344a3780SDimitry Andric   /// Collection of known OpenMP runtime functions..
635344a3780SDimitry Andric   DenseSet<const Function *> RTLFunctions;
6367fa27ce4SDimitry Andric 
6377fa27ce4SDimitry Andric   /// Indicates if we have already linked in the OpenMP device library.
6387fa27ce4SDimitry Andric   bool OpenMPPostLink = false;
639344a3780SDimitry Andric };
640344a3780SDimitry Andric 
641344a3780SDimitry Andric template <typename Ty, bool InsertInvalidates = true>
642344a3780SDimitry Andric struct BooleanStateWithSetVector : public BooleanState {
contains__anon7bbaa8dc0111::BooleanStateWithSetVector643344a3780SDimitry Andric   bool contains(const Ty &Elem) const { return Set.contains(Elem); }
insert__anon7bbaa8dc0111::BooleanStateWithSetVector644344a3780SDimitry Andric   bool insert(const Ty &Elem) {
645344a3780SDimitry Andric     if (InsertInvalidates)
646344a3780SDimitry Andric       BooleanState::indicatePessimisticFixpoint();
647344a3780SDimitry Andric     return Set.insert(Elem);
648344a3780SDimitry Andric   }
649344a3780SDimitry Andric 
operator []__anon7bbaa8dc0111::BooleanStateWithSetVector650344a3780SDimitry Andric   const Ty &operator[](int Idx) const { return Set[Idx]; }
operator ==__anon7bbaa8dc0111::BooleanStateWithSetVector651344a3780SDimitry Andric   bool operator==(const BooleanStateWithSetVector &RHS) const {
652344a3780SDimitry Andric     return BooleanState::operator==(RHS) && Set == RHS.Set;
653344a3780SDimitry Andric   }
operator !=__anon7bbaa8dc0111::BooleanStateWithSetVector654344a3780SDimitry Andric   bool operator!=(const BooleanStateWithSetVector &RHS) const {
655344a3780SDimitry Andric     return !(*this == RHS);
656344a3780SDimitry Andric   }
657344a3780SDimitry Andric 
empty__anon7bbaa8dc0111::BooleanStateWithSetVector658344a3780SDimitry Andric   bool empty() const { return Set.empty(); }
size__anon7bbaa8dc0111::BooleanStateWithSetVector659344a3780SDimitry Andric   size_t size() const { return Set.size(); }
660344a3780SDimitry Andric 
661344a3780SDimitry Andric   /// "Clamp" this state with \p RHS.
operator ^=__anon7bbaa8dc0111::BooleanStateWithSetVector662344a3780SDimitry Andric   BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) {
663344a3780SDimitry Andric     BooleanState::operator^=(RHS);
664344a3780SDimitry Andric     Set.insert(RHS.Set.begin(), RHS.Set.end());
665344a3780SDimitry Andric     return *this;
666344a3780SDimitry Andric   }
667344a3780SDimitry Andric 
668344a3780SDimitry Andric private:
669344a3780SDimitry Andric   /// A set to keep track of elements.
670344a3780SDimitry Andric   SetVector<Ty> Set;
671344a3780SDimitry Andric 
672344a3780SDimitry Andric public:
begin__anon7bbaa8dc0111::BooleanStateWithSetVector673344a3780SDimitry Andric   typename decltype(Set)::iterator begin() { return Set.begin(); }
end__anon7bbaa8dc0111::BooleanStateWithSetVector674344a3780SDimitry Andric   typename decltype(Set)::iterator end() { return Set.end(); }
begin__anon7bbaa8dc0111::BooleanStateWithSetVector675344a3780SDimitry Andric   typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
end__anon7bbaa8dc0111::BooleanStateWithSetVector676344a3780SDimitry Andric   typename decltype(Set)::const_iterator end() const { return Set.end(); }
677344a3780SDimitry Andric };
678344a3780SDimitry Andric 
679344a3780SDimitry Andric template <typename Ty, bool InsertInvalidates = true>
680344a3780SDimitry Andric using BooleanStateWithPtrSetVector =
681344a3780SDimitry Andric     BooleanStateWithSetVector<Ty *, InsertInvalidates>;
682344a3780SDimitry Andric 
683344a3780SDimitry Andric struct KernelInfoState : AbstractState {
684344a3780SDimitry Andric   /// Flag to track if we reached a fixpoint.
685344a3780SDimitry Andric   bool IsAtFixpoint = false;
686344a3780SDimitry Andric 
687344a3780SDimitry Andric   /// The parallel regions (identified by the outlined parallel functions) that
688344a3780SDimitry Andric   /// can be reached from the associated function.
689b1c73532SDimitry Andric   BooleanStateWithPtrSetVector<CallBase, /* InsertInvalidates */ false>
690344a3780SDimitry Andric       ReachedKnownParallelRegions;
691344a3780SDimitry Andric 
692344a3780SDimitry Andric   /// State to track what parallel region we might reach.
693344a3780SDimitry Andric   BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
694344a3780SDimitry Andric 
695344a3780SDimitry Andric   /// State to track if we are in SPMD-mode, assumed or know, and why we decided
696344a3780SDimitry Andric   /// we cannot be. If it is assumed, then RequiresFullRuntime should also be
697344a3780SDimitry Andric   /// false.
698c0981da4SDimitry Andric   BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
699344a3780SDimitry Andric 
700344a3780SDimitry Andric   /// The __kmpc_target_init call in this kernel, if any. If we find more than
701344a3780SDimitry Andric   /// one we abort as the kernel is malformed.
702344a3780SDimitry Andric   CallBase *KernelInitCB = nullptr;
703344a3780SDimitry Andric 
704b1c73532SDimitry Andric   /// The constant kernel environement as taken from and passed to
705b1c73532SDimitry Andric   /// __kmpc_target_init.
706b1c73532SDimitry Andric   ConstantStruct *KernelEnvC = nullptr;
707b1c73532SDimitry Andric 
708344a3780SDimitry Andric   /// The __kmpc_target_deinit call in this kernel, if any. If we find more than
709344a3780SDimitry Andric   /// one we abort as the kernel is malformed.
710344a3780SDimitry Andric   CallBase *KernelDeinitCB = nullptr;
711344a3780SDimitry Andric 
712344a3780SDimitry Andric   /// Flag to indicate if the associated function is a kernel entry.
713344a3780SDimitry Andric   bool IsKernelEntry = false;
714344a3780SDimitry Andric 
715344a3780SDimitry Andric   /// State to track what kernel entries can reach the associated function.
716344a3780SDimitry Andric   BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries;
717344a3780SDimitry Andric 
718344a3780SDimitry Andric   /// State to indicate if we can track parallel level of the associated
719344a3780SDimitry Andric   /// function. We will give up tracking if we encounter unknown caller or the
720344a3780SDimitry Andric   /// caller is __kmpc_parallel_51.
721344a3780SDimitry Andric   BooleanStateWithSetVector<uint8_t> ParallelLevels;
722344a3780SDimitry Andric 
723e3b55780SDimitry Andric   /// Flag that indicates if the kernel has nested Parallelism
724e3b55780SDimitry Andric   bool NestedParallelism = false;
725e3b55780SDimitry Andric 
726344a3780SDimitry Andric   /// Abstract State interface
727344a3780SDimitry Andric   ///{
728344a3780SDimitry Andric 
729145449b1SDimitry Andric   KernelInfoState() = default;
KernelInfoState__anon7bbaa8dc0111::KernelInfoState730344a3780SDimitry Andric   KernelInfoState(bool BestState) {
731344a3780SDimitry Andric     if (!BestState)
732344a3780SDimitry Andric       indicatePessimisticFixpoint();
733344a3780SDimitry Andric   }
734344a3780SDimitry Andric 
735344a3780SDimitry Andric   /// See AbstractState::isValidState(...)
isValidState__anon7bbaa8dc0111::KernelInfoState736344a3780SDimitry Andric   bool isValidState() const override { return true; }
737344a3780SDimitry Andric 
738344a3780SDimitry Andric   /// See AbstractState::isAtFixpoint(...)
isAtFixpoint__anon7bbaa8dc0111::KernelInfoState739344a3780SDimitry Andric   bool isAtFixpoint() const override { return IsAtFixpoint; }
740344a3780SDimitry Andric 
741344a3780SDimitry Andric   /// See AbstractState::indicatePessimisticFixpoint(...)
indicatePessimisticFixpoint__anon7bbaa8dc0111::KernelInfoState742344a3780SDimitry Andric   ChangeStatus indicatePessimisticFixpoint() override {
743344a3780SDimitry Andric     IsAtFixpoint = true;
744e3b55780SDimitry Andric     ParallelLevels.indicatePessimisticFixpoint();
745c0981da4SDimitry Andric     ReachingKernelEntries.indicatePessimisticFixpoint();
746344a3780SDimitry Andric     SPMDCompatibilityTracker.indicatePessimisticFixpoint();
747c0981da4SDimitry Andric     ReachedKnownParallelRegions.indicatePessimisticFixpoint();
748344a3780SDimitry Andric     ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
749b1c73532SDimitry Andric     NestedParallelism = true;
750344a3780SDimitry Andric     return ChangeStatus::CHANGED;
751344a3780SDimitry Andric   }
752344a3780SDimitry Andric 
753344a3780SDimitry Andric   /// See AbstractState::indicateOptimisticFixpoint(...)
indicateOptimisticFixpoint__anon7bbaa8dc0111::KernelInfoState754344a3780SDimitry Andric   ChangeStatus indicateOptimisticFixpoint() override {
755344a3780SDimitry Andric     IsAtFixpoint = true;
756e3b55780SDimitry Andric     ParallelLevels.indicateOptimisticFixpoint();
757c0981da4SDimitry Andric     ReachingKernelEntries.indicateOptimisticFixpoint();
758c0981da4SDimitry Andric     SPMDCompatibilityTracker.indicateOptimisticFixpoint();
759c0981da4SDimitry Andric     ReachedKnownParallelRegions.indicateOptimisticFixpoint();
760c0981da4SDimitry Andric     ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
761344a3780SDimitry Andric     return ChangeStatus::UNCHANGED;
762344a3780SDimitry Andric   }
763344a3780SDimitry Andric 
764344a3780SDimitry Andric   /// Return the assumed state
getAssumed__anon7bbaa8dc0111::KernelInfoState765344a3780SDimitry Andric   KernelInfoState &getAssumed() { return *this; }
getAssumed__anon7bbaa8dc0111::KernelInfoState766344a3780SDimitry Andric   const KernelInfoState &getAssumed() const { return *this; }
767344a3780SDimitry Andric 
operator ==__anon7bbaa8dc0111::KernelInfoState768344a3780SDimitry Andric   bool operator==(const KernelInfoState &RHS) const {
769344a3780SDimitry Andric     if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
770344a3780SDimitry Andric       return false;
771344a3780SDimitry Andric     if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
772344a3780SDimitry Andric       return false;
773344a3780SDimitry Andric     if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
774344a3780SDimitry Andric       return false;
775344a3780SDimitry Andric     if (ReachingKernelEntries != RHS.ReachingKernelEntries)
776344a3780SDimitry Andric       return false;
777e3b55780SDimitry Andric     if (ParallelLevels != RHS.ParallelLevels)
778e3b55780SDimitry Andric       return false;
779b1c73532SDimitry Andric     if (NestedParallelism != RHS.NestedParallelism)
780b1c73532SDimitry Andric       return false;
781344a3780SDimitry Andric     return true;
782344a3780SDimitry Andric   }
783344a3780SDimitry Andric 
784c0981da4SDimitry Andric   /// Returns true if this kernel contains any OpenMP parallel regions.
mayContainParallelRegion__anon7bbaa8dc0111::KernelInfoState785c0981da4SDimitry Andric   bool mayContainParallelRegion() {
786c0981da4SDimitry Andric     return !ReachedKnownParallelRegions.empty() ||
787c0981da4SDimitry Andric            !ReachedUnknownParallelRegions.empty();
788c0981da4SDimitry Andric   }
789c0981da4SDimitry Andric 
790344a3780SDimitry Andric   /// Return empty set as the best state of potential values.
getBestState__anon7bbaa8dc0111::KernelInfoState791344a3780SDimitry Andric   static KernelInfoState getBestState() { return KernelInfoState(true); }
792344a3780SDimitry Andric 
getBestState__anon7bbaa8dc0111::KernelInfoState793344a3780SDimitry Andric   static KernelInfoState getBestState(KernelInfoState &KIS) {
794344a3780SDimitry Andric     return getBestState();
795344a3780SDimitry Andric   }
796344a3780SDimitry Andric 
797344a3780SDimitry Andric   /// Return full set as the worst state of potential values.
getWorstState__anon7bbaa8dc0111::KernelInfoState798344a3780SDimitry Andric   static KernelInfoState getWorstState() { return KernelInfoState(false); }
799344a3780SDimitry Andric 
800344a3780SDimitry Andric   /// "Clamp" this state with \p KIS.
operator ^=__anon7bbaa8dc0111::KernelInfoState801344a3780SDimitry Andric   KernelInfoState operator^=(const KernelInfoState &KIS) {
802344a3780SDimitry Andric     // Do not merge two different _init and _deinit call sites.
803344a3780SDimitry Andric     if (KIS.KernelInitCB) {
804344a3780SDimitry Andric       if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
805c0981da4SDimitry Andric         llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
806c0981da4SDimitry Andric                          "assumptions.");
807344a3780SDimitry Andric       KernelInitCB = KIS.KernelInitCB;
808344a3780SDimitry Andric     }
809344a3780SDimitry Andric     if (KIS.KernelDeinitCB) {
810344a3780SDimitry Andric       if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
811c0981da4SDimitry Andric         llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
812c0981da4SDimitry Andric                          "assumptions.");
813344a3780SDimitry Andric       KernelDeinitCB = KIS.KernelDeinitCB;
814344a3780SDimitry Andric     }
815b1c73532SDimitry Andric     if (KIS.KernelEnvC) {
816b1c73532SDimitry Andric       if (KernelEnvC && KernelEnvC != KIS.KernelEnvC)
817b1c73532SDimitry Andric         llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
818b1c73532SDimitry Andric                          "assumptions.");
819b1c73532SDimitry Andric       KernelEnvC = KIS.KernelEnvC;
820b1c73532SDimitry Andric     }
821344a3780SDimitry Andric     SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
822344a3780SDimitry Andric     ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
823344a3780SDimitry Andric     ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
824e3b55780SDimitry Andric     NestedParallelism |= KIS.NestedParallelism;
825344a3780SDimitry Andric     return *this;
826344a3780SDimitry Andric   }
827344a3780SDimitry Andric 
operator &=__anon7bbaa8dc0111::KernelInfoState828344a3780SDimitry Andric   KernelInfoState operator&=(const KernelInfoState &KIS) {
829344a3780SDimitry Andric     return (*this ^= KIS);
830344a3780SDimitry Andric   }
831344a3780SDimitry Andric 
832344a3780SDimitry Andric   ///}
833cfca06d7SDimitry Andric };
834cfca06d7SDimitry Andric 
835b60736ecSDimitry Andric /// Used to map the values physically (in the IR) stored in an offload
836b60736ecSDimitry Andric /// array, to a vector in memory.
837b60736ecSDimitry Andric struct OffloadArray {
838b60736ecSDimitry Andric   /// Physical array (in the IR).
839b60736ecSDimitry Andric   AllocaInst *Array = nullptr;
840b60736ecSDimitry Andric   /// Mapped values.
841b60736ecSDimitry Andric   SmallVector<Value *, 8> StoredValues;
842b60736ecSDimitry Andric   /// Last stores made in the offload array.
843b60736ecSDimitry Andric   SmallVector<StoreInst *, 8> LastAccesses;
844b60736ecSDimitry Andric 
845b60736ecSDimitry Andric   OffloadArray() = default;
846b60736ecSDimitry Andric 
847b60736ecSDimitry Andric   /// Initializes the OffloadArray with the values stored in \p Array before
848b60736ecSDimitry Andric   /// instruction \p Before is reached. Returns false if the initialization
849b60736ecSDimitry Andric   /// fails.
850b60736ecSDimitry Andric   /// This MUST be used immediately after the construction of the object.
initialize__anon7bbaa8dc0111::OffloadArray851b60736ecSDimitry Andric   bool initialize(AllocaInst &Array, Instruction &Before) {
852b60736ecSDimitry Andric     if (!Array.getAllocatedType()->isArrayTy())
853b60736ecSDimitry Andric       return false;
854b60736ecSDimitry Andric 
855b60736ecSDimitry Andric     if (!getValues(Array, Before))
856b60736ecSDimitry Andric       return false;
857b60736ecSDimitry Andric 
858b60736ecSDimitry Andric     this->Array = &Array;
859b60736ecSDimitry Andric     return true;
860b60736ecSDimitry Andric   }
861b60736ecSDimitry Andric 
862b60736ecSDimitry Andric   static const unsigned DeviceIDArgNum = 1;
863b60736ecSDimitry Andric   static const unsigned BasePtrsArgNum = 3;
864b60736ecSDimitry Andric   static const unsigned PtrsArgNum = 4;
865b60736ecSDimitry Andric   static const unsigned SizesArgNum = 5;
866b60736ecSDimitry Andric 
867b60736ecSDimitry Andric private:
868b60736ecSDimitry Andric   /// Traverses the BasicBlock where \p Array is, collecting the stores made to
869b60736ecSDimitry Andric   /// \p Array, leaving StoredValues with the values stored before the
870b60736ecSDimitry Andric   /// instruction \p Before is reached.
getValues__anon7bbaa8dc0111::OffloadArray871b60736ecSDimitry Andric   bool getValues(AllocaInst &Array, Instruction &Before) {
872b60736ecSDimitry Andric     // Initialize container.
873b60736ecSDimitry Andric     const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
874b60736ecSDimitry Andric     StoredValues.assign(NumValues, nullptr);
875b60736ecSDimitry Andric     LastAccesses.assign(NumValues, nullptr);
876b60736ecSDimitry Andric 
877b60736ecSDimitry Andric     // TODO: This assumes the instruction \p Before is in the same
878b60736ecSDimitry Andric     //  BasicBlock as Array. Make it general, for any control flow graph.
879b60736ecSDimitry Andric     BasicBlock *BB = Array.getParent();
880b60736ecSDimitry Andric     if (BB != Before.getParent())
881b60736ecSDimitry Andric       return false;
882b60736ecSDimitry Andric 
883ac9a064cSDimitry Andric     const DataLayout &DL = Array.getDataLayout();
884b60736ecSDimitry Andric     const unsigned int PointerSize = DL.getPointerSize();
885b60736ecSDimitry Andric 
886b60736ecSDimitry Andric     for (Instruction &I : *BB) {
887b60736ecSDimitry Andric       if (&I == &Before)
888b60736ecSDimitry Andric         break;
889b60736ecSDimitry Andric 
890b60736ecSDimitry Andric       if (!isa<StoreInst>(&I))
891b60736ecSDimitry Andric         continue;
892b60736ecSDimitry Andric 
893b60736ecSDimitry Andric       auto *S = cast<StoreInst>(&I);
894b60736ecSDimitry Andric       int64_t Offset = -1;
895b60736ecSDimitry Andric       auto *Dst =
896b60736ecSDimitry Andric           GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
897b60736ecSDimitry Andric       if (Dst == &Array) {
898b60736ecSDimitry Andric         int64_t Idx = Offset / PointerSize;
899b60736ecSDimitry Andric         StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
900b60736ecSDimitry Andric         LastAccesses[Idx] = S;
901b60736ecSDimitry Andric       }
902b60736ecSDimitry Andric     }
903b60736ecSDimitry Andric 
904b60736ecSDimitry Andric     return isFilled();
905b60736ecSDimitry Andric   }
906b60736ecSDimitry Andric 
907b60736ecSDimitry Andric   /// Returns true if all values in StoredValues and
908b60736ecSDimitry Andric   /// LastAccesses are not nullptrs.
isFilled__anon7bbaa8dc0111::OffloadArray909b60736ecSDimitry Andric   bool isFilled() {
910b60736ecSDimitry Andric     const unsigned NumValues = StoredValues.size();
911b60736ecSDimitry Andric     for (unsigned I = 0; I < NumValues; ++I) {
912b60736ecSDimitry Andric       if (!StoredValues[I] || !LastAccesses[I])
913b60736ecSDimitry Andric         return false;
914b60736ecSDimitry Andric     }
915b60736ecSDimitry Andric 
916b60736ecSDimitry Andric     return true;
917b60736ecSDimitry Andric   }
918b60736ecSDimitry Andric };
919b60736ecSDimitry Andric 
920cfca06d7SDimitry Andric struct OpenMPOpt {
921cfca06d7SDimitry Andric 
922cfca06d7SDimitry Andric   using OptimizationRemarkGetter =
923cfca06d7SDimitry Andric       function_ref<OptimizationRemarkEmitter &(Function *)>;
924cfca06d7SDimitry Andric 
OpenMPOpt__anon7bbaa8dc0111::OpenMPOpt925cfca06d7SDimitry Andric   OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
926cfca06d7SDimitry Andric             OptimizationRemarkGetter OREGetter,
927cfca06d7SDimitry Andric             OMPInformationCache &OMPInfoCache, Attributor &A)
928cfca06d7SDimitry Andric       : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
929cfca06d7SDimitry Andric         OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
930cfca06d7SDimitry Andric 
931b60736ecSDimitry Andric   /// Check if any remarks are enabled for openmp-opt
remarksEnabled__anon7bbaa8dc0111::OpenMPOpt932b60736ecSDimitry Andric   bool remarksEnabled() {
933b60736ecSDimitry Andric     auto &Ctx = M.getContext();
934b60736ecSDimitry Andric     return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
935b60736ecSDimitry Andric   }
936b60736ecSDimitry Andric 
9377fa27ce4SDimitry Andric   /// Run all OpenMP optimizations on the underlying SCC.
run__anon7bbaa8dc0111::OpenMPOpt938344a3780SDimitry Andric   bool run(bool IsModulePass) {
939cfca06d7SDimitry Andric     if (SCC.empty())
940cfca06d7SDimitry Andric       return false;
941cfca06d7SDimitry Andric 
942cfca06d7SDimitry Andric     bool Changed = false;
943cfca06d7SDimitry Andric 
944cfca06d7SDimitry Andric     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
9457fa27ce4SDimitry Andric                       << " functions\n");
946cfca06d7SDimitry Andric 
947344a3780SDimitry Andric     if (IsModulePass) {
948344a3780SDimitry Andric       Changed |= runAttributor(IsModulePass);
949344a3780SDimitry Andric 
950344a3780SDimitry Andric       // Recollect uses, in case Attributor deleted any.
951344a3780SDimitry Andric       OMPInfoCache.recollectUses();
952344a3780SDimitry Andric 
953344a3780SDimitry Andric       // TODO: This should be folded into buildCustomStateMachine.
954344a3780SDimitry Andric       Changed |= rewriteDeviceCodeStateMachine();
955344a3780SDimitry Andric 
956344a3780SDimitry Andric       if (remarksEnabled())
957344a3780SDimitry Andric         analysisGlobalization();
958344a3780SDimitry Andric     } else {
959cfca06d7SDimitry Andric       if (PrintICVValues)
960cfca06d7SDimitry Andric         printICVs();
961cfca06d7SDimitry Andric       if (PrintOpenMPKernels)
962cfca06d7SDimitry Andric         printKernels();
963cfca06d7SDimitry Andric 
964344a3780SDimitry Andric       Changed |= runAttributor(IsModulePass);
965cfca06d7SDimitry Andric 
966cfca06d7SDimitry Andric       // Recollect uses, in case Attributor deleted any.
967cfca06d7SDimitry Andric       OMPInfoCache.recollectUses();
968cfca06d7SDimitry Andric 
969cfca06d7SDimitry Andric       Changed |= deleteParallelRegions();
970344a3780SDimitry Andric 
971b60736ecSDimitry Andric       if (HideMemoryTransferLatency)
972b60736ecSDimitry Andric         Changed |= hideMemTransfersLatency();
973b60736ecSDimitry Andric       Changed |= deduplicateRuntimeCalls();
974b60736ecSDimitry Andric       if (EnableParallelRegionMerging) {
975b60736ecSDimitry Andric         if (mergeParallelRegions()) {
976b60736ecSDimitry Andric           deduplicateRuntimeCalls();
977b60736ecSDimitry Andric           Changed = true;
978b60736ecSDimitry Andric         }
979b60736ecSDimitry Andric       }
980344a3780SDimitry Andric     }
981cfca06d7SDimitry Andric 
982b1c73532SDimitry Andric     if (OMPInfoCache.OpenMPPostLink)
983b1c73532SDimitry Andric       Changed |= removeRuntimeSymbols();
984b1c73532SDimitry Andric 
985cfca06d7SDimitry Andric     return Changed;
986cfca06d7SDimitry Andric   }
987cfca06d7SDimitry Andric 
988cfca06d7SDimitry Andric   /// Print initial ICV values for testing.
989cfca06d7SDimitry Andric   /// FIXME: This should be done from the Attributor once it is added.
printICVs__anon7bbaa8dc0111::OpenMPOpt990cfca06d7SDimitry Andric   void printICVs() const {
991b60736ecSDimitry Andric     InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
992b60736ecSDimitry Andric                                  ICV_proc_bind};
993cfca06d7SDimitry Andric 
994e3b55780SDimitry Andric     for (Function *F : SCC) {
995cfca06d7SDimitry Andric       for (auto ICV : ICVs) {
996cfca06d7SDimitry Andric         auto ICVInfo = OMPInfoCache.ICVs[ICV];
997344a3780SDimitry Andric         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
998344a3780SDimitry Andric           return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
999cfca06d7SDimitry Andric                      << " Value: "
1000cfca06d7SDimitry Andric                      << (ICVInfo.InitValue
1001344a3780SDimitry Andric                              ? toString(ICVInfo.InitValue->getValue(), 10, true)
1002cfca06d7SDimitry Andric                              : "IMPLEMENTATION_DEFINED");
1003cfca06d7SDimitry Andric         };
1004cfca06d7SDimitry Andric 
1005344a3780SDimitry Andric         emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
1006cfca06d7SDimitry Andric       }
1007cfca06d7SDimitry Andric     }
1008cfca06d7SDimitry Andric   }
1009cfca06d7SDimitry Andric 
1010cfca06d7SDimitry Andric   /// Print OpenMP GPU kernels for testing.
printKernels__anon7bbaa8dc0111::OpenMPOpt1011cfca06d7SDimitry Andric   void printKernels() const {
1012cfca06d7SDimitry Andric     for (Function *F : SCC) {
1013b1c73532SDimitry Andric       if (!omp::isOpenMPKernel(*F))
1014cfca06d7SDimitry Andric         continue;
1015cfca06d7SDimitry Andric 
1016344a3780SDimitry Andric       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
1017344a3780SDimitry Andric         return ORA << "OpenMP GPU kernel "
1018cfca06d7SDimitry Andric                    << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
1019cfca06d7SDimitry Andric       };
1020cfca06d7SDimitry Andric 
1021344a3780SDimitry Andric       emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
1022cfca06d7SDimitry Andric     }
1023cfca06d7SDimitry Andric   }
1024cfca06d7SDimitry Andric 
1025cfca06d7SDimitry Andric   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
1026cfca06d7SDimitry Andric   /// given it has to be the callee or a nullptr is returned.
getCallIfRegularCall__anon7bbaa8dc0111::OpenMPOpt1027cfca06d7SDimitry Andric   static CallInst *getCallIfRegularCall(
1028cfca06d7SDimitry Andric       Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
1029cfca06d7SDimitry Andric     CallInst *CI = dyn_cast<CallInst>(U.getUser());
1030cfca06d7SDimitry Andric     if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
1031344a3780SDimitry Andric         (!RFI ||
1032344a3780SDimitry Andric          (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
1033cfca06d7SDimitry Andric       return CI;
1034cfca06d7SDimitry Andric     return nullptr;
1035cfca06d7SDimitry Andric   }
1036cfca06d7SDimitry Andric 
1037cfca06d7SDimitry Andric   /// Return the call if \p V is a regular call. If \p RFI is given it has to be
1038cfca06d7SDimitry Andric   /// the callee or a nullptr is returned.
getCallIfRegularCall__anon7bbaa8dc0111::OpenMPOpt1039cfca06d7SDimitry Andric   static CallInst *getCallIfRegularCall(
1040cfca06d7SDimitry Andric       Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
1041cfca06d7SDimitry Andric     CallInst *CI = dyn_cast<CallInst>(&V);
1042cfca06d7SDimitry Andric     if (CI && !CI->hasOperandBundles() &&
1043344a3780SDimitry Andric         (!RFI ||
1044344a3780SDimitry Andric          (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
1045cfca06d7SDimitry Andric       return CI;
1046cfca06d7SDimitry Andric     return nullptr;
1047cfca06d7SDimitry Andric   }
1048cfca06d7SDimitry Andric 
1049cfca06d7SDimitry Andric private:
1050b60736ecSDimitry Andric   /// Merge parallel regions when it is safe.
mergeParallelRegions__anon7bbaa8dc0111::OpenMPOpt1051b60736ecSDimitry Andric   bool mergeParallelRegions() {
1052b60736ecSDimitry Andric     const unsigned CallbackCalleeOperand = 2;
1053b60736ecSDimitry Andric     const unsigned CallbackFirstArgOperand = 3;
1054b60736ecSDimitry Andric     using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1055b60736ecSDimitry Andric 
1056b60736ecSDimitry Andric     // Check if there are any __kmpc_fork_call calls to merge.
1057b60736ecSDimitry Andric     OMPInformationCache::RuntimeFunctionInfo &RFI =
1058b60736ecSDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
1059b60736ecSDimitry Andric 
1060b60736ecSDimitry Andric     if (!RFI.Declaration)
1061b60736ecSDimitry Andric       return false;
1062b60736ecSDimitry Andric 
1063b60736ecSDimitry Andric     // Unmergable calls that prevent merging a parallel region.
1064b60736ecSDimitry Andric     OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
1065b60736ecSDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
1066b60736ecSDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
1067b60736ecSDimitry Andric     };
1068b60736ecSDimitry Andric 
1069b60736ecSDimitry Andric     bool Changed = false;
1070b60736ecSDimitry Andric     LoopInfo *LI = nullptr;
1071b60736ecSDimitry Andric     DominatorTree *DT = nullptr;
1072b60736ecSDimitry Andric 
1073b60736ecSDimitry Andric     SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
1074b60736ecSDimitry Andric 
1075b60736ecSDimitry Andric     BasicBlock *StartBB = nullptr, *EndBB = nullptr;
1076145449b1SDimitry Andric     auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1077b60736ecSDimitry Andric       BasicBlock *CGStartBB = CodeGenIP.getBlock();
1078b60736ecSDimitry Andric       BasicBlock *CGEndBB =
1079b60736ecSDimitry Andric           SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
1080b60736ecSDimitry Andric       assert(StartBB != nullptr && "StartBB should not be null");
1081b60736ecSDimitry Andric       CGStartBB->getTerminator()->setSuccessor(0, StartBB);
1082b60736ecSDimitry Andric       assert(EndBB != nullptr && "EndBB should not be null");
1083b60736ecSDimitry Andric       EndBB->getTerminator()->setSuccessor(0, CGEndBB);
1084b60736ecSDimitry Andric     };
1085b60736ecSDimitry Andric 
1086b60736ecSDimitry Andric     auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1087b60736ecSDimitry Andric                       Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
1088b60736ecSDimitry Andric       ReplacementValue = &Inner;
1089b60736ecSDimitry Andric       return CodeGenIP;
1090b60736ecSDimitry Andric     };
1091b60736ecSDimitry Andric 
1092b60736ecSDimitry Andric     auto FiniCB = [&](InsertPointTy CodeGenIP) {};
1093b60736ecSDimitry Andric 
1094b60736ecSDimitry Andric     /// Create a sequential execution region within a merged parallel region,
1095b60736ecSDimitry Andric     /// encapsulated in a master construct with a barrier for synchronization.
1096b60736ecSDimitry Andric     auto CreateSequentialRegion = [&](Function *OuterFn,
1097b60736ecSDimitry Andric                                       BasicBlock *OuterPredBB,
1098b60736ecSDimitry Andric                                       Instruction *SeqStartI,
1099b60736ecSDimitry Andric                                       Instruction *SeqEndI) {
1100b60736ecSDimitry Andric       // Isolate the instructions of the sequential region to a separate
1101b60736ecSDimitry Andric       // block.
1102b60736ecSDimitry Andric       BasicBlock *ParentBB = SeqStartI->getParent();
1103b60736ecSDimitry Andric       BasicBlock *SeqEndBB =
1104b60736ecSDimitry Andric           SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
1105b60736ecSDimitry Andric       BasicBlock *SeqAfterBB =
1106b60736ecSDimitry Andric           SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
1107b60736ecSDimitry Andric       BasicBlock *SeqStartBB =
1108b60736ecSDimitry Andric           SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
1109b60736ecSDimitry Andric 
1110b60736ecSDimitry Andric       assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
1111b60736ecSDimitry Andric              "Expected a different CFG");
1112b60736ecSDimitry Andric       const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
1113b60736ecSDimitry Andric       ParentBB->getTerminator()->eraseFromParent();
1114b60736ecSDimitry Andric 
1115145449b1SDimitry Andric       auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1116b60736ecSDimitry Andric         BasicBlock *CGStartBB = CodeGenIP.getBlock();
1117b60736ecSDimitry Andric         BasicBlock *CGEndBB =
1118b60736ecSDimitry Andric             SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
1119b60736ecSDimitry Andric         assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
1120b60736ecSDimitry Andric         CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
1121b60736ecSDimitry Andric         assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
1122b60736ecSDimitry Andric         SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
1123b60736ecSDimitry Andric       };
1124b60736ecSDimitry Andric       auto FiniCB = [&](InsertPointTy CodeGenIP) {};
1125b60736ecSDimitry Andric 
1126b60736ecSDimitry Andric       // Find outputs from the sequential region to outside users and
1127b60736ecSDimitry Andric       // broadcast their values to them.
1128b60736ecSDimitry Andric       for (Instruction &I : *SeqStartBB) {
1129b60736ecSDimitry Andric         SmallPtrSet<Instruction *, 4> OutsideUsers;
1130b60736ecSDimitry Andric         for (User *Usr : I.users()) {
1131b60736ecSDimitry Andric           Instruction &UsrI = *cast<Instruction>(Usr);
1132b60736ecSDimitry Andric           // Ignore outputs to LT intrinsics, code extraction for the merged
1133b60736ecSDimitry Andric           // parallel region will fix them.
1134b60736ecSDimitry Andric           if (UsrI.isLifetimeStartOrEnd())
1135b60736ecSDimitry Andric             continue;
1136b60736ecSDimitry Andric 
1137b60736ecSDimitry Andric           if (UsrI.getParent() != SeqStartBB)
1138b60736ecSDimitry Andric             OutsideUsers.insert(&UsrI);
1139b60736ecSDimitry Andric         }
1140b60736ecSDimitry Andric 
1141b60736ecSDimitry Andric         if (OutsideUsers.empty())
1142b60736ecSDimitry Andric           continue;
1143b60736ecSDimitry Andric 
1144b60736ecSDimitry Andric         // Emit an alloca in the outer region to store the broadcasted
1145b60736ecSDimitry Andric         // value.
1146b60736ecSDimitry Andric         const DataLayout &DL = M.getDataLayout();
1147b60736ecSDimitry Andric         AllocaInst *AllocaI = new AllocaInst(
1148b60736ecSDimitry Andric             I.getType(), DL.getAllocaAddrSpace(), nullptr,
1149ac9a064cSDimitry Andric             I.getName() + ".seq.output.alloc", OuterFn->front().begin());
1150b60736ecSDimitry Andric 
1151b60736ecSDimitry Andric         // Emit a store instruction in the sequential BB to update the
1152b60736ecSDimitry Andric         // value.
1153ac9a064cSDimitry Andric         new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()->getIterator());
1154b60736ecSDimitry Andric 
1155b60736ecSDimitry Andric         // Emit a load instruction and replace the use of the output value
1156b60736ecSDimitry Andric         // with it.
1157b60736ecSDimitry Andric         for (Instruction *UsrI : OutsideUsers) {
1158ac9a064cSDimitry Andric           LoadInst *LoadI = new LoadInst(I.getType(), AllocaI,
1159ac9a064cSDimitry Andric                                          I.getName() + ".seq.output.load",
1160ac9a064cSDimitry Andric                                          UsrI->getIterator());
1161b60736ecSDimitry Andric           UsrI->replaceUsesOfWith(&I, LoadI);
1162b60736ecSDimitry Andric         }
1163b60736ecSDimitry Andric       }
1164b60736ecSDimitry Andric 
1165b60736ecSDimitry Andric       OpenMPIRBuilder::LocationDescription Loc(
1166b60736ecSDimitry Andric           InsertPointTy(ParentBB, ParentBB->end()), DL);
1167b60736ecSDimitry Andric       InsertPointTy SeqAfterIP =
1168b60736ecSDimitry Andric           OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
1169b60736ecSDimitry Andric 
1170b60736ecSDimitry Andric       OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
1171b60736ecSDimitry Andric 
1172b60736ecSDimitry Andric       BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
1173b60736ecSDimitry Andric 
1174b60736ecSDimitry Andric       LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
1175b60736ecSDimitry Andric                         << "\n");
1176b60736ecSDimitry Andric     };
1177b60736ecSDimitry Andric 
1178b60736ecSDimitry Andric     // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
1179b60736ecSDimitry Andric     // contained in BB and only separated by instructions that can be
1180b60736ecSDimitry Andric     // redundantly executed in parallel. The block BB is split before the first
1181b60736ecSDimitry Andric     // call (in MergableCIs) and after the last so the entire region we merge
1182b60736ecSDimitry Andric     // into a single parallel region is contained in a single basic block
1183b60736ecSDimitry Andric     // without any other instructions. We use the OpenMPIRBuilder to outline
1184b60736ecSDimitry Andric     // that block and call the resulting function via __kmpc_fork_call.
11856f8fc217SDimitry Andric     auto Merge = [&](const SmallVectorImpl<CallInst *> &MergableCIs,
11866f8fc217SDimitry Andric                      BasicBlock *BB) {
1187b60736ecSDimitry Andric       // TODO: Change the interface to allow single CIs expanded, e.g, to
1188b60736ecSDimitry Andric       // include an outer loop.
1189b60736ecSDimitry Andric       assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
1190b60736ecSDimitry Andric 
1191b60736ecSDimitry Andric       auto Remark = [&](OptimizationRemark OR) {
1192344a3780SDimitry Andric         OR << "Parallel region merged with parallel region"
1193344a3780SDimitry Andric            << (MergableCIs.size() > 2 ? "s" : "") << " at ";
1194b60736ecSDimitry Andric         for (auto *CI : llvm::drop_begin(MergableCIs)) {
1195b60736ecSDimitry Andric           OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
1196b60736ecSDimitry Andric           if (CI != MergableCIs.back())
1197b60736ecSDimitry Andric             OR << ", ";
1198b60736ecSDimitry Andric         }
1199344a3780SDimitry Andric         return OR << ".";
1200b60736ecSDimitry Andric       };
1201b60736ecSDimitry Andric 
1202344a3780SDimitry Andric       emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark);
1203b60736ecSDimitry Andric 
1204b60736ecSDimitry Andric       Function *OriginalFn = BB->getParent();
1205b60736ecSDimitry Andric       LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
1206b60736ecSDimitry Andric                         << " parallel regions in " << OriginalFn->getName()
1207b60736ecSDimitry Andric                         << "\n");
1208b60736ecSDimitry Andric 
1209b60736ecSDimitry Andric       // Isolate the calls to merge in a separate block.
1210b60736ecSDimitry Andric       EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
1211b60736ecSDimitry Andric       BasicBlock *AfterBB =
1212b60736ecSDimitry Andric           SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
1213b60736ecSDimitry Andric       StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
1214b60736ecSDimitry Andric                            "omp.par.merged");
1215b60736ecSDimitry Andric 
1216b60736ecSDimitry Andric       assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
1217b60736ecSDimitry Andric       const DebugLoc DL = BB->getTerminator()->getDebugLoc();
1218b60736ecSDimitry Andric       BB->getTerminator()->eraseFromParent();
1219b60736ecSDimitry Andric 
1220b60736ecSDimitry Andric       // Create sequential regions for sequential instructions that are
1221b60736ecSDimitry Andric       // in-between mergable parallel regions.
1222b60736ecSDimitry Andric       for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
1223b60736ecSDimitry Andric            It != End; ++It) {
1224b60736ecSDimitry Andric         Instruction *ForkCI = *It;
1225b60736ecSDimitry Andric         Instruction *NextForkCI = *(It + 1);
1226b60736ecSDimitry Andric 
1227b60736ecSDimitry Andric         // Continue if there are not in-between instructions.
1228b60736ecSDimitry Andric         if (ForkCI->getNextNode() == NextForkCI)
1229b60736ecSDimitry Andric           continue;
1230b60736ecSDimitry Andric 
1231b60736ecSDimitry Andric         CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
1232b60736ecSDimitry Andric                                NextForkCI->getPrevNode());
1233b60736ecSDimitry Andric       }
1234b60736ecSDimitry Andric 
1235b60736ecSDimitry Andric       OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
1236b60736ecSDimitry Andric                                                DL);
1237b60736ecSDimitry Andric       IRBuilder<>::InsertPoint AllocaIP(
1238b60736ecSDimitry Andric           &OriginalFn->getEntryBlock(),
1239b60736ecSDimitry Andric           OriginalFn->getEntryBlock().getFirstInsertionPt());
1240b60736ecSDimitry Andric       // Create the merged parallel region with default proc binding, to
1241b60736ecSDimitry Andric       // avoid overriding binding settings, and without explicit cancellation.
1242b60736ecSDimitry Andric       InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
1243b60736ecSDimitry Andric           Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
1244b60736ecSDimitry Andric           OMP_PROC_BIND_default, /* IsCancellable */ false);
1245b60736ecSDimitry Andric       BranchInst::Create(AfterBB, AfterIP.getBlock());
1246b60736ecSDimitry Andric 
1247b60736ecSDimitry Andric       // Perform the actual outlining.
12486f8fc217SDimitry Andric       OMPInfoCache.OMPBuilder.finalize(OriginalFn);
1249b60736ecSDimitry Andric 
1250b60736ecSDimitry Andric       Function *OutlinedFn = MergableCIs.front()->getCaller();
1251b60736ecSDimitry Andric 
1252b60736ecSDimitry Andric       // Replace the __kmpc_fork_call calls with direct calls to the outlined
1253b60736ecSDimitry Andric       // callbacks.
1254b60736ecSDimitry Andric       SmallVector<Value *, 8> Args;
1255b60736ecSDimitry Andric       for (auto *CI : MergableCIs) {
1256145449b1SDimitry Andric         Value *Callee = CI->getArgOperand(CallbackCalleeOperand);
1257145449b1SDimitry Andric         FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask;
1258b60736ecSDimitry Andric         Args.clear();
1259b60736ecSDimitry Andric         Args.push_back(OutlinedFn->getArg(0));
1260b60736ecSDimitry Andric         Args.push_back(OutlinedFn->getArg(1));
1261c0981da4SDimitry Andric         for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
1262c0981da4SDimitry Andric              ++U)
1263b60736ecSDimitry Andric           Args.push_back(CI->getArgOperand(U));
1264b60736ecSDimitry Andric 
1265ac9a064cSDimitry Andric         CallInst *NewCI =
1266ac9a064cSDimitry Andric             CallInst::Create(FT, Callee, Args, "", CI->getIterator());
1267b60736ecSDimitry Andric         if (CI->getDebugLoc())
1268b60736ecSDimitry Andric           NewCI->setDebugLoc(CI->getDebugLoc());
1269b60736ecSDimitry Andric 
1270b60736ecSDimitry Andric         // Forward parameter attributes from the callback to the callee.
1271c0981da4SDimitry Andric         for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
1272c0981da4SDimitry Andric              ++U)
1273c0981da4SDimitry Andric           for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
1274b60736ecSDimitry Andric             NewCI->addParamAttr(
1275b60736ecSDimitry Andric                 U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
1276b60736ecSDimitry Andric 
1277b60736ecSDimitry Andric         // Emit an explicit barrier to replace the implicit fork-join barrier.
1278b60736ecSDimitry Andric         if (CI != MergableCIs.back()) {
1279b60736ecSDimitry Andric           // TODO: Remove barrier if the merged parallel region includes the
1280b60736ecSDimitry Andric           // 'nowait' clause.
1281b60736ecSDimitry Andric           OMPInfoCache.OMPBuilder.createBarrier(
1282b60736ecSDimitry Andric               InsertPointTy(NewCI->getParent(),
1283b60736ecSDimitry Andric                             NewCI->getNextNode()->getIterator()),
1284b60736ecSDimitry Andric               OMPD_parallel);
1285b60736ecSDimitry Andric         }
1286b60736ecSDimitry Andric 
1287b60736ecSDimitry Andric         CI->eraseFromParent();
1288b60736ecSDimitry Andric       }
1289b60736ecSDimitry Andric 
1290b60736ecSDimitry Andric       assert(OutlinedFn != OriginalFn && "Outlining failed");
1291b60736ecSDimitry Andric       CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
1292b60736ecSDimitry Andric       CGUpdater.reanalyzeFunction(*OriginalFn);
1293b60736ecSDimitry Andric 
1294b60736ecSDimitry Andric       NumOpenMPParallelRegionsMerged += MergableCIs.size();
1295b60736ecSDimitry Andric 
1296b60736ecSDimitry Andric       return true;
1297b60736ecSDimitry Andric     };
1298b60736ecSDimitry Andric 
1299b60736ecSDimitry Andric     // Helper function that identifes sequences of
1300b60736ecSDimitry Andric     // __kmpc_fork_call uses in a basic block.
1301b60736ecSDimitry Andric     auto DetectPRsCB = [&](Use &U, Function &F) {
1302b60736ecSDimitry Andric       CallInst *CI = getCallIfRegularCall(U, &RFI);
1303b60736ecSDimitry Andric       BB2PRMap[CI->getParent()].insert(CI);
1304b60736ecSDimitry Andric 
1305b60736ecSDimitry Andric       return false;
1306b60736ecSDimitry Andric     };
1307b60736ecSDimitry Andric 
1308b60736ecSDimitry Andric     BB2PRMap.clear();
1309b60736ecSDimitry Andric     RFI.foreachUse(SCC, DetectPRsCB);
1310b60736ecSDimitry Andric     SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
1311b60736ecSDimitry Andric     // Find mergable parallel regions within a basic block that are
1312b60736ecSDimitry Andric     // safe to merge, that is any in-between instructions can safely
1313b60736ecSDimitry Andric     // execute in parallel after merging.
1314b60736ecSDimitry Andric     // TODO: support merging across basic-blocks.
1315b60736ecSDimitry Andric     for (auto &It : BB2PRMap) {
1316b60736ecSDimitry Andric       auto &CIs = It.getSecond();
1317b60736ecSDimitry Andric       if (CIs.size() < 2)
1318b60736ecSDimitry Andric         continue;
1319b60736ecSDimitry Andric 
1320b60736ecSDimitry Andric       BasicBlock *BB = It.getFirst();
1321b60736ecSDimitry Andric       SmallVector<CallInst *, 4> MergableCIs;
1322b60736ecSDimitry Andric 
1323b60736ecSDimitry Andric       /// Returns true if the instruction is mergable, false otherwise.
1324b60736ecSDimitry Andric       /// A terminator instruction is unmergable by definition since merging
1325b60736ecSDimitry Andric       /// works within a BB. Instructions before the mergable region are
1326b60736ecSDimitry Andric       /// mergable if they are not calls to OpenMP runtime functions that may
1327b60736ecSDimitry Andric       /// set different execution parameters for subsequent parallel regions.
1328b60736ecSDimitry Andric       /// Instructions in-between parallel regions are mergable if they are not
1329b60736ecSDimitry Andric       /// calls to any non-intrinsic function since that may call a non-mergable
1330b60736ecSDimitry Andric       /// OpenMP runtime function.
1331b60736ecSDimitry Andric       auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
1332b60736ecSDimitry Andric         // We do not merge across BBs, hence return false (unmergable) if the
1333b60736ecSDimitry Andric         // instruction is a terminator.
1334b60736ecSDimitry Andric         if (I.isTerminator())
1335b60736ecSDimitry Andric           return false;
1336b60736ecSDimitry Andric 
1337b60736ecSDimitry Andric         if (!isa<CallInst>(&I))
1338b60736ecSDimitry Andric           return true;
1339b60736ecSDimitry Andric 
1340b60736ecSDimitry Andric         CallInst *CI = cast<CallInst>(&I);
1341b60736ecSDimitry Andric         if (IsBeforeMergableRegion) {
1342b60736ecSDimitry Andric           Function *CalledFunction = CI->getCalledFunction();
1343b60736ecSDimitry Andric           if (!CalledFunction)
1344b60736ecSDimitry Andric             return false;
1345b60736ecSDimitry Andric           // Return false (unmergable) if the call before the parallel
1346b60736ecSDimitry Andric           // region calls an explicit affinity (proc_bind) or number of
1347b60736ecSDimitry Andric           // threads (num_threads) compiler-generated function. Those settings
1348b60736ecSDimitry Andric           // may be incompatible with following parallel regions.
1349b60736ecSDimitry Andric           // TODO: ICV tracking to detect compatibility.
1350b60736ecSDimitry Andric           for (const auto &RFI : UnmergableCallsInfo) {
1351b60736ecSDimitry Andric             if (CalledFunction == RFI.Declaration)
1352b60736ecSDimitry Andric               return false;
1353b60736ecSDimitry Andric           }
1354b60736ecSDimitry Andric         } else {
1355b60736ecSDimitry Andric           // Return false (unmergable) if there is a call instruction
1356b60736ecSDimitry Andric           // in-between parallel regions when it is not an intrinsic. It
1357b60736ecSDimitry Andric           // may call an unmergable OpenMP runtime function in its callpath.
1358b60736ecSDimitry Andric           // TODO: Keep track of possible OpenMP calls in the callpath.
1359b60736ecSDimitry Andric           if (!isa<IntrinsicInst>(CI))
1360b60736ecSDimitry Andric             return false;
1361b60736ecSDimitry Andric         }
1362b60736ecSDimitry Andric 
1363b60736ecSDimitry Andric         return true;
1364b60736ecSDimitry Andric       };
1365b60736ecSDimitry Andric       // Find maximal number of parallel region CIs that are safe to merge.
1366b60736ecSDimitry Andric       for (auto It = BB->begin(), End = BB->end(); It != End;) {
1367b60736ecSDimitry Andric         Instruction &I = *It;
1368b60736ecSDimitry Andric         ++It;
1369b60736ecSDimitry Andric 
1370b60736ecSDimitry Andric         if (CIs.count(&I)) {
1371b60736ecSDimitry Andric           MergableCIs.push_back(cast<CallInst>(&I));
1372b60736ecSDimitry Andric           continue;
1373b60736ecSDimitry Andric         }
1374b60736ecSDimitry Andric 
1375b60736ecSDimitry Andric         // Continue expanding if the instruction is mergable.
1376b60736ecSDimitry Andric         if (IsMergable(I, MergableCIs.empty()))
1377b60736ecSDimitry Andric           continue;
1378b60736ecSDimitry Andric 
1379b60736ecSDimitry Andric         // Forward the instruction iterator to skip the next parallel region
1380b60736ecSDimitry Andric         // since there is an unmergable instruction which can affect it.
1381b60736ecSDimitry Andric         for (; It != End; ++It) {
1382b60736ecSDimitry Andric           Instruction &SkipI = *It;
1383b60736ecSDimitry Andric           if (CIs.count(&SkipI)) {
1384b60736ecSDimitry Andric             LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
1385b60736ecSDimitry Andric                               << " due to " << I << "\n");
1386b60736ecSDimitry Andric             ++It;
1387b60736ecSDimitry Andric             break;
1388b60736ecSDimitry Andric           }
1389b60736ecSDimitry Andric         }
1390b60736ecSDimitry Andric 
1391b60736ecSDimitry Andric         // Store mergable regions found.
1392b60736ecSDimitry Andric         if (MergableCIs.size() > 1) {
1393b60736ecSDimitry Andric           MergableCIsVector.push_back(MergableCIs);
1394b60736ecSDimitry Andric           LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
1395b60736ecSDimitry Andric                             << " parallel regions in block " << BB->getName()
1396b60736ecSDimitry Andric                             << " of function " << BB->getParent()->getName()
1397b60736ecSDimitry Andric                             << "\n";);
1398b60736ecSDimitry Andric         }
1399b60736ecSDimitry Andric 
1400b60736ecSDimitry Andric         MergableCIs.clear();
1401b60736ecSDimitry Andric       }
1402b60736ecSDimitry Andric 
1403b60736ecSDimitry Andric       if (!MergableCIsVector.empty()) {
1404b60736ecSDimitry Andric         Changed = true;
1405b60736ecSDimitry Andric 
1406b60736ecSDimitry Andric         for (auto &MergableCIs : MergableCIsVector)
1407b60736ecSDimitry Andric           Merge(MergableCIs, BB);
1408344a3780SDimitry Andric         MergableCIsVector.clear();
1409b60736ecSDimitry Andric       }
1410b60736ecSDimitry Andric     }
1411b60736ecSDimitry Andric 
1412b60736ecSDimitry Andric     if (Changed) {
1413b60736ecSDimitry Andric       /// Re-collect use for fork calls, emitted barrier calls, and
1414b60736ecSDimitry Andric       /// any emitted master/end_master calls.
1415b60736ecSDimitry Andric       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
1416b60736ecSDimitry Andric       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
1417b60736ecSDimitry Andric       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
1418b60736ecSDimitry Andric       OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
1419b60736ecSDimitry Andric     }
1420b60736ecSDimitry Andric 
1421b60736ecSDimitry Andric     return Changed;
1422b60736ecSDimitry Andric   }
1423b60736ecSDimitry Andric 
1424cfca06d7SDimitry Andric   /// Try to delete parallel regions if possible.
deleteParallelRegions__anon7bbaa8dc0111::OpenMPOpt1425cfca06d7SDimitry Andric   bool deleteParallelRegions() {
1426cfca06d7SDimitry Andric     const unsigned CallbackCalleeOperand = 2;
1427cfca06d7SDimitry Andric 
1428cfca06d7SDimitry Andric     OMPInformationCache::RuntimeFunctionInfo &RFI =
1429cfca06d7SDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
1430cfca06d7SDimitry Andric 
1431cfca06d7SDimitry Andric     if (!RFI.Declaration)
1432cfca06d7SDimitry Andric       return false;
1433cfca06d7SDimitry Andric 
1434cfca06d7SDimitry Andric     bool Changed = false;
1435cfca06d7SDimitry Andric     auto DeleteCallCB = [&](Use &U, Function &) {
1436cfca06d7SDimitry Andric       CallInst *CI = getCallIfRegularCall(U);
1437cfca06d7SDimitry Andric       if (!CI)
1438cfca06d7SDimitry Andric         return false;
1439cfca06d7SDimitry Andric       auto *Fn = dyn_cast<Function>(
1440cfca06d7SDimitry Andric           CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1441cfca06d7SDimitry Andric       if (!Fn)
1442cfca06d7SDimitry Andric         return false;
1443cfca06d7SDimitry Andric       if (!Fn->onlyReadsMemory())
1444cfca06d7SDimitry Andric         return false;
1445cfca06d7SDimitry Andric       if (!Fn->hasFnAttribute(Attribute::WillReturn))
1446cfca06d7SDimitry Andric         return false;
1447cfca06d7SDimitry Andric 
1448cfca06d7SDimitry Andric       LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
1449cfca06d7SDimitry Andric                         << CI->getCaller()->getName() << "\n");
1450cfca06d7SDimitry Andric 
1451cfca06d7SDimitry Andric       auto Remark = [&](OptimizationRemark OR) {
1452344a3780SDimitry Andric         return OR << "Removing parallel region with no side-effects.";
1453cfca06d7SDimitry Andric       };
1454344a3780SDimitry Andric       emitRemark<OptimizationRemark>(CI, "OMP160", Remark);
1455cfca06d7SDimitry Andric 
1456cfca06d7SDimitry Andric       CI->eraseFromParent();
1457cfca06d7SDimitry Andric       Changed = true;
1458cfca06d7SDimitry Andric       ++NumOpenMPParallelRegionsDeleted;
1459cfca06d7SDimitry Andric       return true;
1460cfca06d7SDimitry Andric     };
1461cfca06d7SDimitry Andric 
1462cfca06d7SDimitry Andric     RFI.foreachUse(SCC, DeleteCallCB);
1463cfca06d7SDimitry Andric 
1464cfca06d7SDimitry Andric     return Changed;
1465cfca06d7SDimitry Andric   }
1466cfca06d7SDimitry Andric 
1467cfca06d7SDimitry Andric   /// Try to eliminate runtime calls by reusing existing ones.
deduplicateRuntimeCalls__anon7bbaa8dc0111::OpenMPOpt1468cfca06d7SDimitry Andric   bool deduplicateRuntimeCalls() {
1469cfca06d7SDimitry Andric     bool Changed = false;
1470cfca06d7SDimitry Andric 
1471cfca06d7SDimitry Andric     RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1472cfca06d7SDimitry Andric         OMPRTL_omp_get_num_threads,
1473cfca06d7SDimitry Andric         OMPRTL_omp_in_parallel,
1474cfca06d7SDimitry Andric         OMPRTL_omp_get_cancellation,
1475cfca06d7SDimitry Andric         OMPRTL_omp_get_supported_active_levels,
1476cfca06d7SDimitry Andric         OMPRTL_omp_get_level,
1477cfca06d7SDimitry Andric         OMPRTL_omp_get_ancestor_thread_num,
1478cfca06d7SDimitry Andric         OMPRTL_omp_get_team_size,
1479cfca06d7SDimitry Andric         OMPRTL_omp_get_active_level,
1480cfca06d7SDimitry Andric         OMPRTL_omp_in_final,
1481cfca06d7SDimitry Andric         OMPRTL_omp_get_proc_bind,
1482cfca06d7SDimitry Andric         OMPRTL_omp_get_num_places,
1483cfca06d7SDimitry Andric         OMPRTL_omp_get_num_procs,
1484cfca06d7SDimitry Andric         OMPRTL_omp_get_place_num,
1485cfca06d7SDimitry Andric         OMPRTL_omp_get_partition_num_places,
1486cfca06d7SDimitry Andric         OMPRTL_omp_get_partition_place_nums};
1487cfca06d7SDimitry Andric 
1488cfca06d7SDimitry Andric     // Global-tid is handled separately.
1489cfca06d7SDimitry Andric     SmallSetVector<Value *, 16> GTIdArgs;
1490cfca06d7SDimitry Andric     collectGlobalThreadIdArguments(GTIdArgs);
1491cfca06d7SDimitry Andric     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
1492cfca06d7SDimitry Andric                       << " global thread ID arguments\n");
1493cfca06d7SDimitry Andric 
1494cfca06d7SDimitry Andric     for (Function *F : SCC) {
1495cfca06d7SDimitry Andric       for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
1496b60736ecSDimitry Andric         Changed |= deduplicateRuntimeCalls(
1497b60736ecSDimitry Andric             *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1498cfca06d7SDimitry Andric 
1499cfca06d7SDimitry Andric       // __kmpc_global_thread_num is special as we can replace it with an
1500cfca06d7SDimitry Andric       // argument in enough cases to make it worth trying.
1501cfca06d7SDimitry Andric       Value *GTIdArg = nullptr;
1502cfca06d7SDimitry Andric       for (Argument &Arg : F->args())
1503cfca06d7SDimitry Andric         if (GTIdArgs.count(&Arg)) {
1504cfca06d7SDimitry Andric           GTIdArg = &Arg;
1505cfca06d7SDimitry Andric           break;
1506cfca06d7SDimitry Andric         }
1507cfca06d7SDimitry Andric       Changed |= deduplicateRuntimeCalls(
1508cfca06d7SDimitry Andric           *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
1509cfca06d7SDimitry Andric     }
1510cfca06d7SDimitry Andric 
1511cfca06d7SDimitry Andric     return Changed;
1512cfca06d7SDimitry Andric   }
1513cfca06d7SDimitry Andric 
1514b1c73532SDimitry Andric   /// Tries to remove known runtime symbols that are optional from the module.
removeRuntimeSymbols__anon7bbaa8dc0111::OpenMPOpt1515b1c73532SDimitry Andric   bool removeRuntimeSymbols() {
1516b1c73532SDimitry Andric     // The RPC client symbol is defined in `libc` and indicates that something
1517b1c73532SDimitry Andric     // required an RPC server. If its users were all optimized out then we can
1518b1c73532SDimitry Andric     // safely remove it.
1519b1c73532SDimitry Andric     // TODO: This should be somewhere more common in the future.
1520b1c73532SDimitry Andric     if (GlobalVariable *GV = M.getNamedGlobal("__llvm_libc_rpc_client")) {
1521b1c73532SDimitry Andric       if (!GV->getType()->isPointerTy())
1522b1c73532SDimitry Andric         return false;
1523b1c73532SDimitry Andric 
1524b1c73532SDimitry Andric       Constant *C = GV->getInitializer();
1525b1c73532SDimitry Andric       if (!C)
1526b1c73532SDimitry Andric         return false;
1527b1c73532SDimitry Andric 
1528b1c73532SDimitry Andric       // Check to see if the only user of the RPC client is the external handle.
1529b1c73532SDimitry Andric       GlobalVariable *Client = dyn_cast<GlobalVariable>(C->stripPointerCasts());
1530b1c73532SDimitry Andric       if (!Client || Client->getNumUses() > 1 ||
1531b1c73532SDimitry Andric           Client->user_back() != GV->getInitializer())
1532b1c73532SDimitry Andric         return false;
1533b1c73532SDimitry Andric 
1534b1c73532SDimitry Andric       Client->replaceAllUsesWith(PoisonValue::get(Client->getType()));
1535b1c73532SDimitry Andric       Client->eraseFromParent();
1536b1c73532SDimitry Andric 
1537b1c73532SDimitry Andric       GV->replaceAllUsesWith(PoisonValue::get(GV->getType()));
1538b1c73532SDimitry Andric       GV->eraseFromParent();
1539b1c73532SDimitry Andric 
1540b1c73532SDimitry Andric       return true;
1541b1c73532SDimitry Andric     }
1542b1c73532SDimitry Andric     return false;
1543b1c73532SDimitry Andric   }
1544b1c73532SDimitry Andric 
1545b60736ecSDimitry Andric   /// Tries to hide the latency of runtime calls that involve host to
1546b60736ecSDimitry Andric   /// device memory transfers by splitting them into their "issue" and "wait"
1547b60736ecSDimitry Andric   /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1548b60736ecSDimitry Andric   /// moved downards as much as possible. The "issue" issues the memory transfer
1549b60736ecSDimitry Andric   /// asynchronously, returning a handle. The "wait" waits in the returned
1550b60736ecSDimitry Andric   /// handle for the memory transfer to finish.
hideMemTransfersLatency__anon7bbaa8dc0111::OpenMPOpt1551b60736ecSDimitry Andric   bool hideMemTransfersLatency() {
1552b60736ecSDimitry Andric     auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1553b60736ecSDimitry Andric     bool Changed = false;
1554b60736ecSDimitry Andric     auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1555b60736ecSDimitry Andric       auto *RTCall = getCallIfRegularCall(U, &RFI);
1556b60736ecSDimitry Andric       if (!RTCall)
1557b60736ecSDimitry Andric         return false;
1558b60736ecSDimitry Andric 
1559b60736ecSDimitry Andric       OffloadArray OffloadArrays[3];
1560b60736ecSDimitry Andric       if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
1561b60736ecSDimitry Andric         return false;
1562b60736ecSDimitry Andric 
1563b60736ecSDimitry Andric       LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
1564b60736ecSDimitry Andric 
1565b60736ecSDimitry Andric       // TODO: Check if can be moved upwards.
1566b60736ecSDimitry Andric       bool WasSplit = false;
1567b60736ecSDimitry Andric       Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1568b60736ecSDimitry Andric       if (WaitMovementPoint)
1569b60736ecSDimitry Andric         WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1570b60736ecSDimitry Andric 
1571b60736ecSDimitry Andric       Changed |= WasSplit;
1572b60736ecSDimitry Andric       return WasSplit;
1573b60736ecSDimitry Andric     };
15747fa27ce4SDimitry Andric     if (OMPInfoCache.runtimeFnsAvailable(
15757fa27ce4SDimitry Andric             {OMPRTL___tgt_target_data_begin_mapper_issue,
15767fa27ce4SDimitry Andric              OMPRTL___tgt_target_data_begin_mapper_wait}))
1577b60736ecSDimitry Andric       RFI.foreachUse(SCC, SplitMemTransfers);
1578b60736ecSDimitry Andric 
1579b60736ecSDimitry Andric     return Changed;
1580b60736ecSDimitry Andric   }
1581b60736ecSDimitry Andric 
analysisGlobalization__anon7bbaa8dc0111::OpenMPOpt1582b60736ecSDimitry Andric   void analysisGlobalization() {
1583344a3780SDimitry Andric     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
1584b60736ecSDimitry Andric 
1585b60736ecSDimitry Andric     auto CheckGlobalization = [&](Use &U, Function &Decl) {
1586b60736ecSDimitry Andric       if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
1587344a3780SDimitry Andric         auto Remark = [&](OptimizationRemarkMissed ORM) {
1588344a3780SDimitry Andric           return ORM
1589b60736ecSDimitry Andric                  << "Found thread data sharing on the GPU. "
1590b60736ecSDimitry Andric                  << "Expect degraded performance due to data globalization.";
1591b60736ecSDimitry Andric         };
1592344a3780SDimitry Andric         emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark);
1593b60736ecSDimitry Andric       }
1594b60736ecSDimitry Andric 
1595b60736ecSDimitry Andric       return false;
1596b60736ecSDimitry Andric     };
1597b60736ecSDimitry Andric 
1598b60736ecSDimitry Andric     RFI.foreachUse(SCC, CheckGlobalization);
1599b60736ecSDimitry Andric   }
1600b60736ecSDimitry Andric 
1601b60736ecSDimitry Andric   /// Maps the values stored in the offload arrays passed as arguments to
1602b60736ecSDimitry Andric   /// \p RuntimeCall into the offload arrays in \p OAs.
getValuesInOffloadArrays__anon7bbaa8dc0111::OpenMPOpt1603b60736ecSDimitry Andric   bool getValuesInOffloadArrays(CallInst &RuntimeCall,
1604b60736ecSDimitry Andric                                 MutableArrayRef<OffloadArray> OAs) {
1605b60736ecSDimitry Andric     assert(OAs.size() == 3 && "Need space for three offload arrays!");
1606b60736ecSDimitry Andric 
1607b60736ecSDimitry Andric     // A runtime call that involves memory offloading looks something like:
1608b60736ecSDimitry Andric     // call void @__tgt_target_data_begin_mapper(arg0, arg1,
1609b60736ecSDimitry Andric     //   i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
1610b60736ecSDimitry Andric     // ...)
1611b60736ecSDimitry Andric     // So, the idea is to access the allocas that allocate space for these
1612b60736ecSDimitry Andric     // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
1613b60736ecSDimitry Andric     // Therefore:
1614b60736ecSDimitry Andric     // i8** %offload_baseptrs.
1615b60736ecSDimitry Andric     Value *BasePtrsArg =
1616b60736ecSDimitry Andric         RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
1617b60736ecSDimitry Andric     // i8** %offload_ptrs.
1618b60736ecSDimitry Andric     Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
1619b60736ecSDimitry Andric     // i8** %offload_sizes.
1620b60736ecSDimitry Andric     Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
1621b60736ecSDimitry Andric 
1622b60736ecSDimitry Andric     // Get values stored in **offload_baseptrs.
1623b60736ecSDimitry Andric     auto *V = getUnderlyingObject(BasePtrsArg);
1624b60736ecSDimitry Andric     if (!isa<AllocaInst>(V))
1625b60736ecSDimitry Andric       return false;
1626b60736ecSDimitry Andric     auto *BasePtrsArray = cast<AllocaInst>(V);
1627b60736ecSDimitry Andric     if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
1628b60736ecSDimitry Andric       return false;
1629b60736ecSDimitry Andric 
1630b60736ecSDimitry Andric     // Get values stored in **offload_baseptrs.
1631b60736ecSDimitry Andric     V = getUnderlyingObject(PtrsArg);
1632b60736ecSDimitry Andric     if (!isa<AllocaInst>(V))
1633b60736ecSDimitry Andric       return false;
1634b60736ecSDimitry Andric     auto *PtrsArray = cast<AllocaInst>(V);
1635b60736ecSDimitry Andric     if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
1636b60736ecSDimitry Andric       return false;
1637b60736ecSDimitry Andric 
1638b60736ecSDimitry Andric     // Get values stored in **offload_sizes.
1639b60736ecSDimitry Andric     V = getUnderlyingObject(SizesArg);
1640b60736ecSDimitry Andric     // If it's a [constant] global array don't analyze it.
1641b60736ecSDimitry Andric     if (isa<GlobalValue>(V))
1642b60736ecSDimitry Andric       return isa<Constant>(V);
1643b60736ecSDimitry Andric     if (!isa<AllocaInst>(V))
1644b60736ecSDimitry Andric       return false;
1645b60736ecSDimitry Andric 
1646b60736ecSDimitry Andric     auto *SizesArray = cast<AllocaInst>(V);
1647b60736ecSDimitry Andric     if (!OAs[2].initialize(*SizesArray, RuntimeCall))
1648b60736ecSDimitry Andric       return false;
1649b60736ecSDimitry Andric 
1650b60736ecSDimitry Andric     return true;
1651b60736ecSDimitry Andric   }
1652b60736ecSDimitry Andric 
1653b60736ecSDimitry Andric   /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
1654b60736ecSDimitry Andric   /// For now this is a way to test that the function getValuesInOffloadArrays
1655b60736ecSDimitry Andric   /// is working properly.
1656b60736ecSDimitry Andric   /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
dumpValuesInOffloadArrays__anon7bbaa8dc0111::OpenMPOpt1657b60736ecSDimitry Andric   void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
1658b60736ecSDimitry Andric     assert(OAs.size() == 3 && "There are three offload arrays to debug!");
1659b60736ecSDimitry Andric 
1660b60736ecSDimitry Andric     LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
1661b60736ecSDimitry Andric     std::string ValuesStr;
1662b60736ecSDimitry Andric     raw_string_ostream Printer(ValuesStr);
1663b60736ecSDimitry Andric     std::string Separator = " --- ";
1664b60736ecSDimitry Andric 
1665b60736ecSDimitry Andric     for (auto *BP : OAs[0].StoredValues) {
1666b60736ecSDimitry Andric       BP->print(Printer);
1667b60736ecSDimitry Andric       Printer << Separator;
1668b60736ecSDimitry Andric     }
1669ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << ValuesStr << "\n");
1670b60736ecSDimitry Andric     ValuesStr.clear();
1671b60736ecSDimitry Andric 
1672b60736ecSDimitry Andric     for (auto *P : OAs[1].StoredValues) {
1673b60736ecSDimitry Andric       P->print(Printer);
1674b60736ecSDimitry Andric       Printer << Separator;
1675b60736ecSDimitry Andric     }
1676ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << ValuesStr << "\n");
1677b60736ecSDimitry Andric     ValuesStr.clear();
1678b60736ecSDimitry Andric 
1679b60736ecSDimitry Andric     for (auto *S : OAs[2].StoredValues) {
1680b60736ecSDimitry Andric       S->print(Printer);
1681b60736ecSDimitry Andric       Printer << Separator;
1682b60736ecSDimitry Andric     }
1683ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << ValuesStr << "\n");
1684b60736ecSDimitry Andric   }
1685b60736ecSDimitry Andric 
1686b60736ecSDimitry Andric   /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1687b60736ecSDimitry Andric   /// moved. Returns nullptr if the movement is not possible, or not worth it.
canBeMovedDownwards__anon7bbaa8dc0111::OpenMPOpt1688b60736ecSDimitry Andric   Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1689b60736ecSDimitry Andric     // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1690b60736ecSDimitry Andric     //  Make it traverse the CFG.
1691b60736ecSDimitry Andric 
1692b60736ecSDimitry Andric     Instruction *CurrentI = &RuntimeCall;
1693b60736ecSDimitry Andric     bool IsWorthIt = false;
1694b60736ecSDimitry Andric     while ((CurrentI = CurrentI->getNextNode())) {
1695b60736ecSDimitry Andric 
1696b60736ecSDimitry Andric       // TODO: Once we detect the regions to be offloaded we should use the
1697b60736ecSDimitry Andric       //  alias analysis manager to check if CurrentI may modify one of
1698b60736ecSDimitry Andric       //  the offloaded regions.
1699b60736ecSDimitry Andric       if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1700b60736ecSDimitry Andric         if (IsWorthIt)
1701b60736ecSDimitry Andric           return CurrentI;
1702b60736ecSDimitry Andric 
1703b60736ecSDimitry Andric         return nullptr;
1704b60736ecSDimitry Andric       }
1705b60736ecSDimitry Andric 
1706b60736ecSDimitry Andric       // FIXME: For now if we move it over anything without side effect
1707b60736ecSDimitry Andric       //  is worth it.
1708b60736ecSDimitry Andric       IsWorthIt = true;
1709b60736ecSDimitry Andric     }
1710b60736ecSDimitry Andric 
1711b60736ecSDimitry Andric     // Return end of BasicBlock.
1712b60736ecSDimitry Andric     return RuntimeCall.getParent()->getTerminator();
1713b60736ecSDimitry Andric   }
1714b60736ecSDimitry Andric 
1715b60736ecSDimitry Andric   /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
splitTargetDataBeginRTC__anon7bbaa8dc0111::OpenMPOpt1716b60736ecSDimitry Andric   bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1717b60736ecSDimitry Andric                                Instruction &WaitMovementPoint) {
1718b60736ecSDimitry Andric     // Create stack allocated handle (__tgt_async_info) at the beginning of the
1719b60736ecSDimitry Andric     // function. Used for storing information of the async transfer, allowing to
1720b60736ecSDimitry Andric     // wait on it later.
1721b60736ecSDimitry Andric     auto &IRBuilder = OMPInfoCache.OMPBuilder;
1722e3b55780SDimitry Andric     Function *F = RuntimeCall.getCaller();
1723e3b55780SDimitry Andric     BasicBlock &Entry = F->getEntryBlock();
1724e3b55780SDimitry Andric     IRBuilder.Builder.SetInsertPoint(&Entry,
1725e3b55780SDimitry Andric                                      Entry.getFirstNonPHIOrDbgOrAlloca());
1726e3b55780SDimitry Andric     Value *Handle = IRBuilder.Builder.CreateAlloca(
1727e3b55780SDimitry Andric         IRBuilder.AsyncInfo, /*ArraySize=*/nullptr, "handle");
1728e3b55780SDimitry Andric     Handle =
1729e3b55780SDimitry Andric         IRBuilder.Builder.CreateAddrSpaceCast(Handle, IRBuilder.AsyncInfoPtr);
1730b60736ecSDimitry Andric 
1731b60736ecSDimitry Andric     // Add "issue" runtime call declaration:
1732b60736ecSDimitry Andric     // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1733b60736ecSDimitry Andric     //   i8**, i8**, i64*, i64*)
1734b60736ecSDimitry Andric     FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1735b60736ecSDimitry Andric         M, OMPRTL___tgt_target_data_begin_mapper_issue);
1736b60736ecSDimitry Andric 
1737b60736ecSDimitry Andric     // Change RuntimeCall call site for its asynchronous version.
1738b60736ecSDimitry Andric     SmallVector<Value *, 16> Args;
1739b60736ecSDimitry Andric     for (auto &Arg : RuntimeCall.args())
1740b60736ecSDimitry Andric       Args.push_back(Arg.get());
1741b60736ecSDimitry Andric     Args.push_back(Handle);
1742b60736ecSDimitry Andric 
1743ac9a064cSDimitry Andric     CallInst *IssueCallsite = CallInst::Create(IssueDecl, Args, /*NameStr=*/"",
1744ac9a064cSDimitry Andric                                                RuntimeCall.getIterator());
17456f8fc217SDimitry Andric     OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite);
1746b60736ecSDimitry Andric     RuntimeCall.eraseFromParent();
1747b60736ecSDimitry Andric 
1748b60736ecSDimitry Andric     // Add "wait" runtime call declaration:
1749b60736ecSDimitry Andric     // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1750b60736ecSDimitry Andric     FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1751b60736ecSDimitry Andric         M, OMPRTL___tgt_target_data_begin_mapper_wait);
1752b60736ecSDimitry Andric 
1753b60736ecSDimitry Andric     Value *WaitParams[2] = {
1754b60736ecSDimitry Andric         IssueCallsite->getArgOperand(
1755b60736ecSDimitry Andric             OffloadArray::DeviceIDArgNum), // device_id.
1756b60736ecSDimitry Andric         Handle                             // handle to wait on.
1757b60736ecSDimitry Andric     };
17586f8fc217SDimitry Andric     CallInst *WaitCallsite = CallInst::Create(
1759ac9a064cSDimitry Andric         WaitDecl, WaitParams, /*NameStr=*/"", WaitMovementPoint.getIterator());
17606f8fc217SDimitry Andric     OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite);
1761b60736ecSDimitry Andric 
1762b60736ecSDimitry Andric     return true;
1763b60736ecSDimitry Andric   }
1764b60736ecSDimitry Andric 
combinedIdentStruct__anon7bbaa8dc0111::OpenMPOpt1765cfca06d7SDimitry Andric   static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1766cfca06d7SDimitry Andric                                     bool GlobalOnly, bool &SingleChoice) {
1767cfca06d7SDimitry Andric     if (CurrentIdent == NextIdent)
1768cfca06d7SDimitry Andric       return CurrentIdent;
1769cfca06d7SDimitry Andric 
1770cfca06d7SDimitry Andric     // TODO: Figure out how to actually combine multiple debug locations. For
1771cfca06d7SDimitry Andric     //       now we just keep an existing one if there is a single choice.
1772cfca06d7SDimitry Andric     if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1773cfca06d7SDimitry Andric       SingleChoice = !CurrentIdent;
1774cfca06d7SDimitry Andric       return NextIdent;
1775cfca06d7SDimitry Andric     }
1776cfca06d7SDimitry Andric     return nullptr;
1777cfca06d7SDimitry Andric   }
1778cfca06d7SDimitry Andric 
1779cfca06d7SDimitry Andric   /// Return an `struct ident_t*` value that represents the ones used in the
1780cfca06d7SDimitry Andric   /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1781cfca06d7SDimitry Andric   /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1782cfca06d7SDimitry Andric   /// return value we create one from scratch. We also do not yet combine
1783cfca06d7SDimitry Andric   /// information, e.g., the source locations, see combinedIdentStruct.
1784cfca06d7SDimitry Andric   Value *
getCombinedIdentFromCallUsesIn__anon7bbaa8dc0111::OpenMPOpt1785cfca06d7SDimitry Andric   getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
1786cfca06d7SDimitry Andric                                  Function &F, bool GlobalOnly) {
1787cfca06d7SDimitry Andric     bool SingleChoice = true;
1788cfca06d7SDimitry Andric     Value *Ident = nullptr;
1789cfca06d7SDimitry Andric     auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1790cfca06d7SDimitry Andric       CallInst *CI = getCallIfRegularCall(U, &RFI);
1791cfca06d7SDimitry Andric       if (!CI || &F != &Caller)
1792cfca06d7SDimitry Andric         return false;
1793cfca06d7SDimitry Andric       Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1794cfca06d7SDimitry Andric                                   /* GlobalOnly */ true, SingleChoice);
1795cfca06d7SDimitry Andric       return false;
1796cfca06d7SDimitry Andric     };
1797cfca06d7SDimitry Andric     RFI.foreachUse(SCC, CombineIdentStruct);
1798cfca06d7SDimitry Andric 
1799cfca06d7SDimitry Andric     if (!Ident || !SingleChoice) {
1800cfca06d7SDimitry Andric       // The IRBuilder uses the insertion block to get to the module, this is
1801cfca06d7SDimitry Andric       // unfortunate but we work around it for now.
1802cfca06d7SDimitry Andric       if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
1803cfca06d7SDimitry Andric         OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1804cfca06d7SDimitry Andric             &F.getEntryBlock(), F.getEntryBlock().begin()));
1805cfca06d7SDimitry Andric       // Create a fallback location if non was found.
1806cfca06d7SDimitry Andric       // TODO: Use the debug locations of the calls instead.
18076f8fc217SDimitry Andric       uint32_t SrcLocStrSize;
18086f8fc217SDimitry Andric       Constant *Loc =
18096f8fc217SDimitry Andric           OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
18106f8fc217SDimitry Andric       Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize);
1811cfca06d7SDimitry Andric     }
1812cfca06d7SDimitry Andric     return Ident;
1813cfca06d7SDimitry Andric   }
1814cfca06d7SDimitry Andric 
1815cfca06d7SDimitry Andric   /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
1816cfca06d7SDimitry Andric   /// \p ReplVal if given.
deduplicateRuntimeCalls__anon7bbaa8dc0111::OpenMPOpt1817cfca06d7SDimitry Andric   bool deduplicateRuntimeCalls(Function &F,
1818cfca06d7SDimitry Andric                                OMPInformationCache::RuntimeFunctionInfo &RFI,
1819cfca06d7SDimitry Andric                                Value *ReplVal = nullptr) {
1820cfca06d7SDimitry Andric     auto *UV = RFI.getUseVector(F);
1821cfca06d7SDimitry Andric     if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1822cfca06d7SDimitry Andric       return false;
1823cfca06d7SDimitry Andric 
1824cfca06d7SDimitry Andric     LLVM_DEBUG(
1825cfca06d7SDimitry Andric         dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
1826cfca06d7SDimitry Andric                << (ReplVal ? " with an existing value\n" : "\n") << "\n");
1827cfca06d7SDimitry Andric 
1828cfca06d7SDimitry Andric     assert((!ReplVal || (isa<Argument>(ReplVal) &&
1829cfca06d7SDimitry Andric                          cast<Argument>(ReplVal)->getParent() == &F)) &&
1830cfca06d7SDimitry Andric            "Unexpected replacement value!");
1831cfca06d7SDimitry Andric 
1832cfca06d7SDimitry Andric     // TODO: Use dominance to find a good position instead.
1833cfca06d7SDimitry Andric     auto CanBeMoved = [this](CallBase &CB) {
1834c0981da4SDimitry Andric       unsigned NumArgs = CB.arg_size();
1835cfca06d7SDimitry Andric       if (NumArgs == 0)
1836cfca06d7SDimitry Andric         return true;
1837cfca06d7SDimitry Andric       if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1838cfca06d7SDimitry Andric         return false;
1839c0981da4SDimitry Andric       for (unsigned U = 1; U < NumArgs; ++U)
1840c0981da4SDimitry Andric         if (isa<Instruction>(CB.getArgOperand(U)))
1841cfca06d7SDimitry Andric           return false;
1842cfca06d7SDimitry Andric       return true;
1843cfca06d7SDimitry Andric     };
1844cfca06d7SDimitry Andric 
1845cfca06d7SDimitry Andric     if (!ReplVal) {
18467fa27ce4SDimitry Andric       auto *DT =
18477fa27ce4SDimitry Andric           OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F);
18487fa27ce4SDimitry Andric       if (!DT)
18497fa27ce4SDimitry Andric         return false;
18507fa27ce4SDimitry Andric       Instruction *IP = nullptr;
18517fa27ce4SDimitry Andric       for (Use *U : *UV) {
1852cfca06d7SDimitry Andric         if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
18537fa27ce4SDimitry Andric           if (IP)
18547fa27ce4SDimitry Andric             IP = DT->findNearestCommonDominator(IP, CI);
18557fa27ce4SDimitry Andric           else
18567fa27ce4SDimitry Andric             IP = CI;
1857cfca06d7SDimitry Andric           if (!CanBeMoved(*CI))
1858cfca06d7SDimitry Andric             continue;
18597fa27ce4SDimitry Andric           if (!ReplVal)
1860cfca06d7SDimitry Andric             ReplVal = CI;
18617fa27ce4SDimitry Andric         }
1862cfca06d7SDimitry Andric       }
1863cfca06d7SDimitry Andric       if (!ReplVal)
1864cfca06d7SDimitry Andric         return false;
18657fa27ce4SDimitry Andric       assert(IP && "Expected insertion point!");
18667fa27ce4SDimitry Andric       cast<Instruction>(ReplVal)->moveBefore(IP);
1867cfca06d7SDimitry Andric     }
1868cfca06d7SDimitry Andric 
1869cfca06d7SDimitry Andric     // If we use a call as a replacement value we need to make sure the ident is
1870cfca06d7SDimitry Andric     // valid at the new location. For now we just pick a global one, either
1871cfca06d7SDimitry Andric     // existing and used by one of the calls, or created from scratch.
1872cfca06d7SDimitry Andric     if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
1873c0981da4SDimitry Andric       if (!CI->arg_empty() &&
1874cfca06d7SDimitry Andric           CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1875cfca06d7SDimitry Andric         Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1876cfca06d7SDimitry Andric                                                       /* GlobalOnly */ true);
1877cfca06d7SDimitry Andric         CI->setArgOperand(0, Ident);
1878cfca06d7SDimitry Andric       }
1879cfca06d7SDimitry Andric     }
1880cfca06d7SDimitry Andric 
1881cfca06d7SDimitry Andric     bool Changed = false;
1882cfca06d7SDimitry Andric     auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
1883cfca06d7SDimitry Andric       CallInst *CI = getCallIfRegularCall(U, &RFI);
1884cfca06d7SDimitry Andric       if (!CI || CI == ReplVal || &F != &Caller)
1885cfca06d7SDimitry Andric         return false;
1886cfca06d7SDimitry Andric       assert(CI->getCaller() == &F && "Unexpected call!");
1887cfca06d7SDimitry Andric 
1888cfca06d7SDimitry Andric       auto Remark = [&](OptimizationRemark OR) {
1889cfca06d7SDimitry Andric         return OR << "OpenMP runtime call "
1890344a3780SDimitry Andric                   << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated.";
1891cfca06d7SDimitry Andric       };
1892344a3780SDimitry Andric       if (CI->getDebugLoc())
1893344a3780SDimitry Andric         emitRemark<OptimizationRemark>(CI, "OMP170", Remark);
1894344a3780SDimitry Andric       else
1895344a3780SDimitry Andric         emitRemark<OptimizationRemark>(&F, "OMP170", Remark);
1896cfca06d7SDimitry Andric 
1897cfca06d7SDimitry Andric       CI->replaceAllUsesWith(ReplVal);
1898cfca06d7SDimitry Andric       CI->eraseFromParent();
1899cfca06d7SDimitry Andric       ++NumOpenMPRuntimeCallsDeduplicated;
1900cfca06d7SDimitry Andric       Changed = true;
1901cfca06d7SDimitry Andric       return true;
1902cfca06d7SDimitry Andric     };
1903cfca06d7SDimitry Andric     RFI.foreachUse(SCC, ReplaceAndDeleteCB);
1904cfca06d7SDimitry Andric 
1905cfca06d7SDimitry Andric     return Changed;
1906cfca06d7SDimitry Andric   }
1907cfca06d7SDimitry Andric 
1908cfca06d7SDimitry Andric   /// Collect arguments that represent the global thread id in \p GTIdArgs.
collectGlobalThreadIdArguments__anon7bbaa8dc0111::OpenMPOpt1909cfca06d7SDimitry Andric   void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
1910cfca06d7SDimitry Andric     // TODO: Below we basically perform a fixpoint iteration with a pessimistic
1911cfca06d7SDimitry Andric     //       initialization. We could define an AbstractAttribute instead and
1912cfca06d7SDimitry Andric     //       run the Attributor here once it can be run as an SCC pass.
1913cfca06d7SDimitry Andric 
1914cfca06d7SDimitry Andric     // Helper to check the argument \p ArgNo at all call sites of \p F for
1915cfca06d7SDimitry Andric     // a GTId.
1916cfca06d7SDimitry Andric     auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
1917cfca06d7SDimitry Andric       if (!F.hasLocalLinkage())
1918cfca06d7SDimitry Andric         return false;
1919cfca06d7SDimitry Andric       for (Use &U : F.uses()) {
1920cfca06d7SDimitry Andric         if (CallInst *CI = getCallIfRegularCall(U)) {
1921cfca06d7SDimitry Andric           Value *ArgOp = CI->getArgOperand(ArgNo);
1922cfca06d7SDimitry Andric           if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
1923cfca06d7SDimitry Andric               getCallIfRegularCall(
1924cfca06d7SDimitry Andric                   *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
1925cfca06d7SDimitry Andric             continue;
1926cfca06d7SDimitry Andric         }
1927cfca06d7SDimitry Andric         return false;
1928cfca06d7SDimitry Andric       }
1929cfca06d7SDimitry Andric       return true;
1930cfca06d7SDimitry Andric     };
1931cfca06d7SDimitry Andric 
1932cfca06d7SDimitry Andric     // Helper to identify uses of a GTId as GTId arguments.
1933cfca06d7SDimitry Andric     auto AddUserArgs = [&](Value &GTId) {
1934cfca06d7SDimitry Andric       for (Use &U : GTId.uses())
1935cfca06d7SDimitry Andric         if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
1936cfca06d7SDimitry Andric           if (CI->isArgOperand(&U))
1937cfca06d7SDimitry Andric             if (Function *Callee = CI->getCalledFunction())
1938cfca06d7SDimitry Andric               if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
1939cfca06d7SDimitry Andric                 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
1940cfca06d7SDimitry Andric     };
1941cfca06d7SDimitry Andric 
1942cfca06d7SDimitry Andric     // The argument users of __kmpc_global_thread_num calls are GTIds.
1943cfca06d7SDimitry Andric     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
1944cfca06d7SDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
1945cfca06d7SDimitry Andric 
1946cfca06d7SDimitry Andric     GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
1947cfca06d7SDimitry Andric       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
1948cfca06d7SDimitry Andric         AddUserArgs(*CI);
1949cfca06d7SDimitry Andric       return false;
1950cfca06d7SDimitry Andric     });
1951cfca06d7SDimitry Andric 
1952cfca06d7SDimitry Andric     // Transitively search for more arguments by looking at the users of the
1953cfca06d7SDimitry Andric     // ones we know already. During the search the GTIdArgs vector is extended
1954cfca06d7SDimitry Andric     // so we cannot cache the size nor can we use a range based for.
1955c0981da4SDimitry Andric     for (unsigned U = 0; U < GTIdArgs.size(); ++U)
1956c0981da4SDimitry Andric       AddUserArgs(*GTIdArgs[U]);
1957cfca06d7SDimitry Andric   }
1958cfca06d7SDimitry Andric 
1959cfca06d7SDimitry Andric   /// Kernel (=GPU) optimizations and utility functions
1960cfca06d7SDimitry Andric   ///
1961cfca06d7SDimitry Andric   ///{{
1962cfca06d7SDimitry Andric 
1963cfca06d7SDimitry Andric   /// Cache to remember the unique kernel for a function.
1964e3b55780SDimitry Andric   DenseMap<Function *, std::optional<Kernel>> UniqueKernelMap;
1965cfca06d7SDimitry Andric 
1966cfca06d7SDimitry Andric   /// Find the unique kernel that will execute \p F, if any.
1967cfca06d7SDimitry Andric   Kernel getUniqueKernelFor(Function &F);
1968cfca06d7SDimitry Andric 
1969cfca06d7SDimitry Andric   /// Find the unique kernel that will execute \p I, if any.
getUniqueKernelFor__anon7bbaa8dc0111::OpenMPOpt1970cfca06d7SDimitry Andric   Kernel getUniqueKernelFor(Instruction &I) {
1971cfca06d7SDimitry Andric     return getUniqueKernelFor(*I.getFunction());
1972cfca06d7SDimitry Andric   }
1973cfca06d7SDimitry Andric 
1974cfca06d7SDimitry Andric   /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
1975cfca06d7SDimitry Andric   /// the cases we can avoid taking the address of a function.
1976cfca06d7SDimitry Andric   bool rewriteDeviceCodeStateMachine();
1977cfca06d7SDimitry Andric 
1978cfca06d7SDimitry Andric   ///
1979cfca06d7SDimitry Andric   ///}}
1980cfca06d7SDimitry Andric 
1981cfca06d7SDimitry Andric   /// Emit a remark generically
1982cfca06d7SDimitry Andric   ///
1983cfca06d7SDimitry Andric   /// This template function can be used to generically emit a remark. The
1984cfca06d7SDimitry Andric   /// RemarkKind should be one of the following:
1985cfca06d7SDimitry Andric   ///   - OptimizationRemark to indicate a successful optimization attempt
1986cfca06d7SDimitry Andric   ///   - OptimizationRemarkMissed to report a failed optimization attempt
1987cfca06d7SDimitry Andric   ///   - OptimizationRemarkAnalysis to provide additional information about an
1988cfca06d7SDimitry Andric   ///     optimization attempt
1989cfca06d7SDimitry Andric   ///
1990cfca06d7SDimitry Andric   /// The remark is built using a callback function provided by the caller that
1991cfca06d7SDimitry Andric   /// takes a RemarkKind as input and returns a RemarkKind.
1992344a3780SDimitry Andric   template <typename RemarkKind, typename RemarkCallBack>
emitRemark__anon7bbaa8dc0111::OpenMPOpt1993344a3780SDimitry Andric   void emitRemark(Instruction *I, StringRef RemarkName,
1994cfca06d7SDimitry Andric                   RemarkCallBack &&RemarkCB) const {
1995344a3780SDimitry Andric     Function *F = I->getParent()->getParent();
1996cfca06d7SDimitry Andric     auto &ORE = OREGetter(F);
1997cfca06d7SDimitry Andric 
1998b1c73532SDimitry Andric     if (RemarkName.starts_with("OMP"))
1999cfca06d7SDimitry Andric       ORE.emit([&]() {
2000344a3780SDimitry Andric         return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I))
2001344a3780SDimitry Andric                << " [" << RemarkName << "]";
2002cfca06d7SDimitry Andric       });
2003344a3780SDimitry Andric     else
2004344a3780SDimitry Andric       ORE.emit(
2005344a3780SDimitry Andric           [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
2006cfca06d7SDimitry Andric   }
2007cfca06d7SDimitry Andric 
2008344a3780SDimitry Andric   /// Emit a remark on a function.
2009344a3780SDimitry Andric   template <typename RemarkKind, typename RemarkCallBack>
emitRemark__anon7bbaa8dc0111::OpenMPOpt2010344a3780SDimitry Andric   void emitRemark(Function *F, StringRef RemarkName,
2011344a3780SDimitry Andric                   RemarkCallBack &&RemarkCB) const {
2012344a3780SDimitry Andric     auto &ORE = OREGetter(F);
2013344a3780SDimitry Andric 
2014b1c73532SDimitry Andric     if (RemarkName.starts_with("OMP"))
2015344a3780SDimitry Andric       ORE.emit([&]() {
2016344a3780SDimitry Andric         return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F))
2017344a3780SDimitry Andric                << " [" << RemarkName << "]";
2018344a3780SDimitry Andric       });
2019344a3780SDimitry Andric     else
2020344a3780SDimitry Andric       ORE.emit(
2021344a3780SDimitry Andric           [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
2022344a3780SDimitry Andric   }
2023344a3780SDimitry Andric 
2024cfca06d7SDimitry Andric   /// The underlying module.
2025cfca06d7SDimitry Andric   Module &M;
2026cfca06d7SDimitry Andric 
2027cfca06d7SDimitry Andric   /// The SCC we are operating on.
2028cfca06d7SDimitry Andric   SmallVectorImpl<Function *> &SCC;
2029cfca06d7SDimitry Andric 
2030cfca06d7SDimitry Andric   /// Callback to update the call graph, the first argument is a removed call,
2031cfca06d7SDimitry Andric   /// the second an optional replacement call.
2032cfca06d7SDimitry Andric   CallGraphUpdater &CGUpdater;
2033cfca06d7SDimitry Andric 
2034cfca06d7SDimitry Andric   /// Callback to get an OptimizationRemarkEmitter from a Function *
2035cfca06d7SDimitry Andric   OptimizationRemarkGetter OREGetter;
2036cfca06d7SDimitry Andric 
2037cfca06d7SDimitry Andric   /// OpenMP-specific information cache. Also Used for Attributor runs.
2038cfca06d7SDimitry Andric   OMPInformationCache &OMPInfoCache;
2039cfca06d7SDimitry Andric 
2040cfca06d7SDimitry Andric   /// Attributor instance.
2041cfca06d7SDimitry Andric   Attributor &A;
2042cfca06d7SDimitry Andric 
2043cfca06d7SDimitry Andric   /// Helper function to run Attributor on SCC.
runAttributor__anon7bbaa8dc0111::OpenMPOpt2044344a3780SDimitry Andric   bool runAttributor(bool IsModulePass) {
2045cfca06d7SDimitry Andric     if (SCC.empty())
2046cfca06d7SDimitry Andric       return false;
2047cfca06d7SDimitry Andric 
2048344a3780SDimitry Andric     registerAAs(IsModulePass);
2049cfca06d7SDimitry Andric 
2050cfca06d7SDimitry Andric     ChangeStatus Changed = A.run();
2051cfca06d7SDimitry Andric 
2052cfca06d7SDimitry Andric     LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
2053cfca06d7SDimitry Andric                       << " functions, result: " << Changed << ".\n");
2054cfca06d7SDimitry Andric 
205599aabd70SDimitry Andric     if (Changed == ChangeStatus::CHANGED)
205699aabd70SDimitry Andric       OMPInfoCache.invalidateAnalyses();
205799aabd70SDimitry Andric 
2058cfca06d7SDimitry Andric     return Changed == ChangeStatus::CHANGED;
2059cfca06d7SDimitry Andric   }
2060cfca06d7SDimitry Andric 
2061344a3780SDimitry Andric   void registerFoldRuntimeCall(RuntimeFunction RF);
2062344a3780SDimitry Andric 
2063cfca06d7SDimitry Andric   /// Populate the Attributor with abstract attribute opportunities in the
2064e3b55780SDimitry Andric   /// functions.
2065344a3780SDimitry Andric   void registerAAs(bool IsModulePass);
2066e3b55780SDimitry Andric 
2067e3b55780SDimitry Andric public:
2068e3b55780SDimitry Andric   /// Callback to register AAs for live functions, including internal functions
2069e3b55780SDimitry Andric   /// marked live during the traversal.
2070e3b55780SDimitry Andric   static void registerAAsForFunction(Attributor &A, const Function &F);
2071cfca06d7SDimitry Andric };
2072cfca06d7SDimitry Andric 
getUniqueKernelFor(Function & F)2073cfca06d7SDimitry Andric Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
20747fa27ce4SDimitry Andric   if (OMPInfoCache.CGSCC && !OMPInfoCache.CGSCC->empty() &&
20757fa27ce4SDimitry Andric       !OMPInfoCache.CGSCC->contains(&F))
2076cfca06d7SDimitry Andric     return nullptr;
2077cfca06d7SDimitry Andric 
2078cfca06d7SDimitry Andric   // Use a scope to keep the lifetime of the CachedKernel short.
2079cfca06d7SDimitry Andric   {
2080e3b55780SDimitry Andric     std::optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
2081cfca06d7SDimitry Andric     if (CachedKernel)
2082cfca06d7SDimitry Andric       return *CachedKernel;
2083cfca06d7SDimitry Andric 
2084cfca06d7SDimitry Andric     // TODO: We should use an AA to create an (optimistic and callback
2085cfca06d7SDimitry Andric     //       call-aware) call graph. For now we stick to simple patterns that
2086cfca06d7SDimitry Andric     //       are less powerful, basically the worst fixpoint.
2087b1c73532SDimitry Andric     if (isOpenMPKernel(F)) {
2088cfca06d7SDimitry Andric       CachedKernel = Kernel(&F);
2089cfca06d7SDimitry Andric       return *CachedKernel;
2090cfca06d7SDimitry Andric     }
2091cfca06d7SDimitry Andric 
2092cfca06d7SDimitry Andric     CachedKernel = nullptr;
2093b60736ecSDimitry Andric     if (!F.hasLocalLinkage()) {
2094b60736ecSDimitry Andric 
2095b60736ecSDimitry Andric       // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
2096344a3780SDimitry Andric       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2097344a3780SDimitry Andric         return ORA << "Potentially unknown OpenMP target region caller.";
2098b60736ecSDimitry Andric       };
2099344a3780SDimitry Andric       emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
2100b60736ecSDimitry Andric 
2101cfca06d7SDimitry Andric       return nullptr;
2102cfca06d7SDimitry Andric     }
2103b60736ecSDimitry Andric   }
2104cfca06d7SDimitry Andric 
2105cfca06d7SDimitry Andric   auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
2106cfca06d7SDimitry Andric     if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2107cfca06d7SDimitry Andric       // Allow use in equality comparisons.
2108cfca06d7SDimitry Andric       if (Cmp->isEquality())
2109cfca06d7SDimitry Andric         return getUniqueKernelFor(*Cmp);
2110cfca06d7SDimitry Andric       return nullptr;
2111cfca06d7SDimitry Andric     }
2112cfca06d7SDimitry Andric     if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
2113cfca06d7SDimitry Andric       // Allow direct calls.
2114cfca06d7SDimitry Andric       if (CB->isCallee(&U))
2115cfca06d7SDimitry Andric         return getUniqueKernelFor(*CB);
2116344a3780SDimitry Andric 
2117344a3780SDimitry Andric       OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
2118344a3780SDimitry Andric           OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
2119344a3780SDimitry Andric       // Allow the use in __kmpc_parallel_51 calls.
2120344a3780SDimitry Andric       if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
2121cfca06d7SDimitry Andric         return getUniqueKernelFor(*CB);
2122cfca06d7SDimitry Andric       return nullptr;
2123cfca06d7SDimitry Andric     }
2124cfca06d7SDimitry Andric     // Disallow every other use.
2125cfca06d7SDimitry Andric     return nullptr;
2126cfca06d7SDimitry Andric   };
2127cfca06d7SDimitry Andric 
2128cfca06d7SDimitry Andric   // TODO: In the future we want to track more than just a unique kernel.
2129cfca06d7SDimitry Andric   SmallPtrSet<Kernel, 2> PotentialKernels;
2130b60736ecSDimitry Andric   OMPInformationCache::foreachUse(F, [&](const Use &U) {
2131cfca06d7SDimitry Andric     PotentialKernels.insert(GetUniqueKernelForUse(U));
2132cfca06d7SDimitry Andric   });
2133cfca06d7SDimitry Andric 
2134cfca06d7SDimitry Andric   Kernel K = nullptr;
2135cfca06d7SDimitry Andric   if (PotentialKernels.size() == 1)
2136cfca06d7SDimitry Andric     K = *PotentialKernels.begin();
2137cfca06d7SDimitry Andric 
2138cfca06d7SDimitry Andric   // Cache the result.
2139cfca06d7SDimitry Andric   UniqueKernelMap[&F] = K;
2140cfca06d7SDimitry Andric 
2141cfca06d7SDimitry Andric   return K;
2142cfca06d7SDimitry Andric }
2143cfca06d7SDimitry Andric 
rewriteDeviceCodeStateMachine()2144cfca06d7SDimitry Andric bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
2145344a3780SDimitry Andric   OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
2146344a3780SDimitry Andric       OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
2147cfca06d7SDimitry Andric 
2148cfca06d7SDimitry Andric   bool Changed = false;
2149344a3780SDimitry Andric   if (!KernelParallelRFI)
2150cfca06d7SDimitry Andric     return Changed;
2151cfca06d7SDimitry Andric 
2152c0981da4SDimitry Andric   // If we have disabled state machine changes, exit
2153c0981da4SDimitry Andric   if (DisableOpenMPOptStateMachineRewrite)
2154c0981da4SDimitry Andric     return Changed;
2155c0981da4SDimitry Andric 
2156cfca06d7SDimitry Andric   for (Function *F : SCC) {
2157cfca06d7SDimitry Andric 
2158344a3780SDimitry Andric     // Check if the function is a use in a __kmpc_parallel_51 call at
2159cfca06d7SDimitry Andric     // all.
2160cfca06d7SDimitry Andric     bool UnknownUse = false;
2161344a3780SDimitry Andric     bool KernelParallelUse = false;
2162cfca06d7SDimitry Andric     unsigned NumDirectCalls = 0;
2163cfca06d7SDimitry Andric 
2164cfca06d7SDimitry Andric     SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
2165b60736ecSDimitry Andric     OMPInformationCache::foreachUse(*F, [&](Use &U) {
2166cfca06d7SDimitry Andric       if (auto *CB = dyn_cast<CallBase>(U.getUser()))
2167cfca06d7SDimitry Andric         if (CB->isCallee(&U)) {
2168cfca06d7SDimitry Andric           ++NumDirectCalls;
2169cfca06d7SDimitry Andric           return;
2170cfca06d7SDimitry Andric         }
2171cfca06d7SDimitry Andric 
2172cfca06d7SDimitry Andric       if (isa<ICmpInst>(U.getUser())) {
2173cfca06d7SDimitry Andric         ToBeReplacedStateMachineUses.push_back(&U);
2174cfca06d7SDimitry Andric         return;
2175cfca06d7SDimitry Andric       }
2176344a3780SDimitry Andric 
2177344a3780SDimitry Andric       // Find wrapper functions that represent parallel kernels.
2178344a3780SDimitry Andric       CallInst *CI =
2179344a3780SDimitry Andric           OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
2180344a3780SDimitry Andric       const unsigned int WrapperFunctionArgNo = 6;
2181344a3780SDimitry Andric       if (!KernelParallelUse && CI &&
2182344a3780SDimitry Andric           CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
2183344a3780SDimitry Andric         KernelParallelUse = true;
2184cfca06d7SDimitry Andric         ToBeReplacedStateMachineUses.push_back(&U);
2185cfca06d7SDimitry Andric         return;
2186cfca06d7SDimitry Andric       }
2187cfca06d7SDimitry Andric       UnknownUse = true;
2188cfca06d7SDimitry Andric     });
2189cfca06d7SDimitry Andric 
2190344a3780SDimitry Andric     // Do not emit a remark if we haven't seen a __kmpc_parallel_51
2191cfca06d7SDimitry Andric     // use.
2192344a3780SDimitry Andric     if (!KernelParallelUse)
2193cfca06d7SDimitry Andric       continue;
2194cfca06d7SDimitry Andric 
2195cfca06d7SDimitry Andric     // If this ever hits, we should investigate.
2196cfca06d7SDimitry Andric     // TODO: Checking the number of uses is not a necessary restriction and
2197cfca06d7SDimitry Andric     // should be lifted.
2198cfca06d7SDimitry Andric     if (UnknownUse || NumDirectCalls != 1 ||
2199344a3780SDimitry Andric         ToBeReplacedStateMachineUses.size() > 2) {
2200344a3780SDimitry Andric       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2201344a3780SDimitry Andric         return ORA << "Parallel region is used in "
2202cfca06d7SDimitry Andric                    << (UnknownUse ? "unknown" : "unexpected")
2203344a3780SDimitry Andric                    << " ways. Will not attempt to rewrite the state machine.";
2204cfca06d7SDimitry Andric       };
2205344a3780SDimitry Andric       emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark);
2206cfca06d7SDimitry Andric       continue;
2207cfca06d7SDimitry Andric     }
2208cfca06d7SDimitry Andric 
2209344a3780SDimitry Andric     // Even if we have __kmpc_parallel_51 calls, we (for now) give
2210cfca06d7SDimitry Andric     // up if the function is not called from a unique kernel.
2211cfca06d7SDimitry Andric     Kernel K = getUniqueKernelFor(*F);
2212cfca06d7SDimitry Andric     if (!K) {
2213344a3780SDimitry Andric       auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2214344a3780SDimitry Andric         return ORA << "Parallel region is not called from a unique kernel. "
2215344a3780SDimitry Andric                       "Will not attempt to rewrite the state machine.";
2216cfca06d7SDimitry Andric       };
2217344a3780SDimitry Andric       emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark);
2218cfca06d7SDimitry Andric       continue;
2219cfca06d7SDimitry Andric     }
2220cfca06d7SDimitry Andric 
2221cfca06d7SDimitry Andric     // We now know F is a parallel body function called only from the kernel K.
2222cfca06d7SDimitry Andric     // We also identified the state machine uses in which we replace the
2223cfca06d7SDimitry Andric     // function pointer by a new global symbol for identification purposes. This
2224cfca06d7SDimitry Andric     // ensures only direct calls to the function are left.
2225cfca06d7SDimitry Andric 
2226cfca06d7SDimitry Andric     Module &M = *F->getParent();
2227cfca06d7SDimitry Andric     Type *Int8Ty = Type::getInt8Ty(M.getContext());
2228cfca06d7SDimitry Andric 
2229cfca06d7SDimitry Andric     auto *ID = new GlobalVariable(
2230cfca06d7SDimitry Andric         M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
2231cfca06d7SDimitry Andric         UndefValue::get(Int8Ty), F->getName() + ".ID");
2232cfca06d7SDimitry Andric 
2233cfca06d7SDimitry Andric     for (Use *U : ToBeReplacedStateMachineUses)
2234c0981da4SDimitry Andric       U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
2235c0981da4SDimitry Andric           ID, U->get()->getType()));
2236cfca06d7SDimitry Andric 
2237cfca06d7SDimitry Andric     ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
2238cfca06d7SDimitry Andric 
2239cfca06d7SDimitry Andric     Changed = true;
2240cfca06d7SDimitry Andric   }
2241cfca06d7SDimitry Andric 
2242cfca06d7SDimitry Andric   return Changed;
2243cfca06d7SDimitry Andric }
2244cfca06d7SDimitry Andric 
2245cfca06d7SDimitry Andric /// Abstract Attribute for tracking ICV values.
2246cfca06d7SDimitry Andric struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
2247cfca06d7SDimitry Andric   using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker__anon7bbaa8dc0111::AAICVTracker2248cfca06d7SDimitry Andric   AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2249cfca06d7SDimitry Andric 
2250cfca06d7SDimitry Andric   /// Returns true if value is assumed to be tracked.
isAssumedTracked__anon7bbaa8dc0111::AAICVTracker2251cfca06d7SDimitry Andric   bool isAssumedTracked() const { return getAssumed(); }
2252cfca06d7SDimitry Andric 
2253cfca06d7SDimitry Andric   /// Returns true if value is known to be tracked.
isKnownTracked__anon7bbaa8dc0111::AAICVTracker2254cfca06d7SDimitry Andric   bool isKnownTracked() const { return getAssumed(); }
2255cfca06d7SDimitry Andric 
2256cfca06d7SDimitry Andric   /// Create an abstract attribute biew for the position \p IRP.
2257cfca06d7SDimitry Andric   static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
2258cfca06d7SDimitry Andric 
2259cfca06d7SDimitry Andric   /// Return the value with which \p I can be replaced for specific \p ICV.
getReplacementValue__anon7bbaa8dc0111::AAICVTracker2260e3b55780SDimitry Andric   virtual std::optional<Value *> getReplacementValue(InternalControlVar ICV,
2261b60736ecSDimitry Andric                                                      const Instruction *I,
2262b60736ecSDimitry Andric                                                      Attributor &A) const {
2263e3b55780SDimitry Andric     return std::nullopt;
2264b60736ecSDimitry Andric   }
2265b60736ecSDimitry Andric 
2266b60736ecSDimitry Andric   /// Return an assumed unique ICV value if a single candidate is found. If
2267e3b55780SDimitry Andric   /// there cannot be one, return a nullptr. If it is not clear yet, return
2268e3b55780SDimitry Andric   /// std::nullopt.
2269e3b55780SDimitry Andric   virtual std::optional<Value *>
2270b60736ecSDimitry Andric   getUniqueReplacementValue(InternalControlVar ICV) const = 0;
2271b60736ecSDimitry Andric 
2272b60736ecSDimitry Andric   // Currently only nthreads is being tracked.
2273b60736ecSDimitry Andric   // this array will only grow with time.
2274b60736ecSDimitry Andric   InternalControlVar TrackableICVs[1] = {ICV_nthreads};
2275cfca06d7SDimitry Andric 
2276cfca06d7SDimitry Andric   /// See AbstractAttribute::getName()
getName__anon7bbaa8dc0111::AAICVTracker2277cfca06d7SDimitry Andric   const std::string getName() const override { return "AAICVTracker"; }
2278cfca06d7SDimitry Andric 
2279cfca06d7SDimitry Andric   /// See AbstractAttribute::getIdAddr()
getIdAddr__anon7bbaa8dc0111::AAICVTracker2280cfca06d7SDimitry Andric   const char *getIdAddr() const override { return &ID; }
2281cfca06d7SDimitry Andric 
2282cfca06d7SDimitry Andric   /// This function should return true if the type of the \p AA is AAICVTracker
classof__anon7bbaa8dc0111::AAICVTracker2283cfca06d7SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
2284cfca06d7SDimitry Andric     return (AA->getIdAddr() == &ID);
2285cfca06d7SDimitry Andric   }
2286cfca06d7SDimitry Andric 
2287cfca06d7SDimitry Andric   static const char ID;
2288cfca06d7SDimitry Andric };
2289cfca06d7SDimitry Andric 
2290cfca06d7SDimitry Andric struct AAICVTrackerFunction : public AAICVTracker {
AAICVTrackerFunction__anon7bbaa8dc0111::AAICVTrackerFunction2291cfca06d7SDimitry Andric   AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
2292cfca06d7SDimitry Andric       : AAICVTracker(IRP, A) {}
2293cfca06d7SDimitry Andric 
2294cfca06d7SDimitry Andric   // FIXME: come up with better string.
getAsStr__anon7bbaa8dc0111::AAICVTrackerFunction22957fa27ce4SDimitry Andric   const std::string getAsStr(Attributor *) const override {
22967fa27ce4SDimitry Andric     return "ICVTrackerFunction";
22977fa27ce4SDimitry Andric   }
2298cfca06d7SDimitry Andric 
2299cfca06d7SDimitry Andric   // FIXME: come up with some stats.
trackStatistics__anon7bbaa8dc0111::AAICVTrackerFunction2300cfca06d7SDimitry Andric   void trackStatistics() const override {}
2301cfca06d7SDimitry Andric 
2302b60736ecSDimitry Andric   /// We don't manifest anything for this AA.
manifest__anon7bbaa8dc0111::AAICVTrackerFunction2303cfca06d7SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
2304b60736ecSDimitry Andric     return ChangeStatus::UNCHANGED;
2305cfca06d7SDimitry Andric   }
2306cfca06d7SDimitry Andric 
2307cfca06d7SDimitry Andric   // Map of ICV to their values at specific program point.
2308b60736ecSDimitry Andric   EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
2309cfca06d7SDimitry Andric                   InternalControlVar::ICV___last>
2310b60736ecSDimitry Andric       ICVReplacementValuesMap;
2311cfca06d7SDimitry Andric 
updateImpl__anon7bbaa8dc0111::AAICVTrackerFunction2312cfca06d7SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
2313cfca06d7SDimitry Andric     ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
2314cfca06d7SDimitry Andric 
2315cfca06d7SDimitry Andric     Function *F = getAnchorScope();
2316cfca06d7SDimitry Andric 
2317cfca06d7SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2318cfca06d7SDimitry Andric 
2319cfca06d7SDimitry Andric     for (InternalControlVar ICV : TrackableICVs) {
2320cfca06d7SDimitry Andric       auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2321cfca06d7SDimitry Andric 
2322b60736ecSDimitry Andric       auto &ValuesMap = ICVReplacementValuesMap[ICV];
2323cfca06d7SDimitry Andric       auto TrackValues = [&](Use &U, Function &) {
2324cfca06d7SDimitry Andric         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
2325cfca06d7SDimitry Andric         if (!CI)
2326cfca06d7SDimitry Andric           return false;
2327cfca06d7SDimitry Andric 
2328cfca06d7SDimitry Andric         // FIXME: handle setters with more that 1 arguments.
2329cfca06d7SDimitry Andric         /// Track new value.
2330b60736ecSDimitry Andric         if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
2331cfca06d7SDimitry Andric           HasChanged = ChangeStatus::CHANGED;
2332cfca06d7SDimitry Andric 
2333cfca06d7SDimitry Andric         return false;
2334cfca06d7SDimitry Andric       };
2335cfca06d7SDimitry Andric 
2336b60736ecSDimitry Andric       auto CallCheck = [&](Instruction &I) {
2337e3b55780SDimitry Andric         std::optional<Value *> ReplVal = getValueForCall(A, I, ICV);
2338145449b1SDimitry Andric         if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
2339b60736ecSDimitry Andric           HasChanged = ChangeStatus::CHANGED;
2340b60736ecSDimitry Andric 
2341b60736ecSDimitry Andric         return true;
2342b60736ecSDimitry Andric       };
2343b60736ecSDimitry Andric 
2344b60736ecSDimitry Andric       // Track all changes of an ICV.
2345cfca06d7SDimitry Andric       SetterRFI.foreachUse(TrackValues, F);
2346b60736ecSDimitry Andric 
2347344a3780SDimitry Andric       bool UsedAssumedInformation = false;
2348b60736ecSDimitry Andric       A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
2349344a3780SDimitry Andric                                 UsedAssumedInformation,
2350b60736ecSDimitry Andric                                 /* CheckBBLivenessOnly */ true);
2351b60736ecSDimitry Andric 
2352b60736ecSDimitry Andric       /// TODO: Figure out a way to avoid adding entry in
2353b60736ecSDimitry Andric       /// ICVReplacementValuesMap
2354b60736ecSDimitry Andric       Instruction *Entry = &F->getEntryBlock().front();
2355b60736ecSDimitry Andric       if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
2356b60736ecSDimitry Andric         ValuesMap.insert(std::make_pair(Entry, nullptr));
2357cfca06d7SDimitry Andric     }
2358cfca06d7SDimitry Andric 
2359cfca06d7SDimitry Andric     return HasChanged;
2360cfca06d7SDimitry Andric   }
2361cfca06d7SDimitry Andric 
23626f8fc217SDimitry Andric   /// Helper to check if \p I is a call and get the value for it if it is
2363b60736ecSDimitry Andric   /// unique.
getValueForCall__anon7bbaa8dc0111::AAICVTrackerFunction2364e3b55780SDimitry Andric   std::optional<Value *> getValueForCall(Attributor &A, const Instruction &I,
2365b60736ecSDimitry Andric                                          InternalControlVar &ICV) const {
2366cfca06d7SDimitry Andric 
23676f8fc217SDimitry Andric     const auto *CB = dyn_cast<CallBase>(&I);
2368b60736ecSDimitry Andric     if (!CB || CB->hasFnAttr("no_openmp") ||
2369b60736ecSDimitry Andric         CB->hasFnAttr("no_openmp_routines"))
2370e3b55780SDimitry Andric       return std::nullopt;
2371b60736ecSDimitry Andric 
2372cfca06d7SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2373cfca06d7SDimitry Andric     auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
2374b60736ecSDimitry Andric     auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2375b60736ecSDimitry Andric     Function *CalledFunction = CB->getCalledFunction();
2376cfca06d7SDimitry Andric 
2377b60736ecSDimitry Andric     // Indirect call, assume ICV changes.
2378b60736ecSDimitry Andric     if (CalledFunction == nullptr)
2379b60736ecSDimitry Andric       return nullptr;
2380b60736ecSDimitry Andric     if (CalledFunction == GetterRFI.Declaration)
2381e3b55780SDimitry Andric       return std::nullopt;
2382b60736ecSDimitry Andric     if (CalledFunction == SetterRFI.Declaration) {
23836f8fc217SDimitry Andric       if (ICVReplacementValuesMap[ICV].count(&I))
23846f8fc217SDimitry Andric         return ICVReplacementValuesMap[ICV].lookup(&I);
2385b60736ecSDimitry Andric 
2386b60736ecSDimitry Andric       return nullptr;
2387b60736ecSDimitry Andric     }
2388b60736ecSDimitry Andric 
2389b60736ecSDimitry Andric     // Since we don't know, assume it changes the ICV.
2390b60736ecSDimitry Andric     if (CalledFunction->isDeclaration())
2391b60736ecSDimitry Andric       return nullptr;
2392b60736ecSDimitry Andric 
23937fa27ce4SDimitry Andric     const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
2394344a3780SDimitry Andric         *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
2395b60736ecSDimitry Andric 
23967fa27ce4SDimitry Andric     if (ICVTrackingAA->isAssumedTracked()) {
23977fa27ce4SDimitry Andric       std::optional<Value *> URV =
23987fa27ce4SDimitry Andric           ICVTrackingAA->getUniqueReplacementValue(ICV);
2399145449b1SDimitry Andric       if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I),
2400145449b1SDimitry Andric                                                  OMPInfoCache)))
24016f8fc217SDimitry Andric         return URV;
24026f8fc217SDimitry Andric     }
2403b60736ecSDimitry Andric 
2404b60736ecSDimitry Andric     // If we don't know, assume it changes.
2405b60736ecSDimitry Andric     return nullptr;
2406b60736ecSDimitry Andric   }
2407b60736ecSDimitry Andric 
2408e3b55780SDimitry Andric   // We don't check unique value for a function, so return std::nullopt.
2409e3b55780SDimitry Andric   std::optional<Value *>
getUniqueReplacementValue__anon7bbaa8dc0111::AAICVTrackerFunction2410b60736ecSDimitry Andric   getUniqueReplacementValue(InternalControlVar ICV) const override {
2411e3b55780SDimitry Andric     return std::nullopt;
2412b60736ecSDimitry Andric   }
2413b60736ecSDimitry Andric 
2414b60736ecSDimitry Andric   /// Return the value with which \p I can be replaced for specific \p ICV.
getReplacementValue__anon7bbaa8dc0111::AAICVTrackerFunction2415e3b55780SDimitry Andric   std::optional<Value *> getReplacementValue(InternalControlVar ICV,
2416b60736ecSDimitry Andric                                              const Instruction *I,
2417b60736ecSDimitry Andric                                              Attributor &A) const override {
2418b60736ecSDimitry Andric     const auto &ValuesMap = ICVReplacementValuesMap[ICV];
2419b60736ecSDimitry Andric     if (ValuesMap.count(I))
2420b60736ecSDimitry Andric       return ValuesMap.lookup(I);
2421b60736ecSDimitry Andric 
2422b60736ecSDimitry Andric     SmallVector<const Instruction *, 16> Worklist;
2423b60736ecSDimitry Andric     SmallPtrSet<const Instruction *, 16> Visited;
2424b60736ecSDimitry Andric     Worklist.push_back(I);
2425b60736ecSDimitry Andric 
2426e3b55780SDimitry Andric     std::optional<Value *> ReplVal;
2427b60736ecSDimitry Andric 
2428b60736ecSDimitry Andric     while (!Worklist.empty()) {
2429b60736ecSDimitry Andric       const Instruction *CurrInst = Worklist.pop_back_val();
2430b60736ecSDimitry Andric       if (!Visited.insert(CurrInst).second)
2431cfca06d7SDimitry Andric         continue;
2432cfca06d7SDimitry Andric 
2433b60736ecSDimitry Andric       const BasicBlock *CurrBB = CurrInst->getParent();
2434b60736ecSDimitry Andric 
2435b60736ecSDimitry Andric       // Go up and look for all potential setters/calls that might change the
2436b60736ecSDimitry Andric       // ICV.
2437b60736ecSDimitry Andric       while ((CurrInst = CurrInst->getPrevNode())) {
2438b60736ecSDimitry Andric         if (ValuesMap.count(CurrInst)) {
2439e3b55780SDimitry Andric           std::optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
2440b60736ecSDimitry Andric           // Unknown value, track new.
2441145449b1SDimitry Andric           if (!ReplVal) {
2442b60736ecSDimitry Andric             ReplVal = NewReplVal;
2443b60736ecSDimitry Andric             break;
2444b60736ecSDimitry Andric           }
2445b60736ecSDimitry Andric 
2446b60736ecSDimitry Andric           // If we found a new value, we can't know the icv value anymore.
2447145449b1SDimitry Andric           if (NewReplVal)
2448b60736ecSDimitry Andric             if (ReplVal != NewReplVal)
2449cfca06d7SDimitry Andric               return nullptr;
2450cfca06d7SDimitry Andric 
2451b60736ecSDimitry Andric           break;
2452cfca06d7SDimitry Andric         }
2453cfca06d7SDimitry Andric 
2454e3b55780SDimitry Andric         std::optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV);
2455145449b1SDimitry Andric         if (!NewReplVal)
2456b60736ecSDimitry Andric           continue;
2457b60736ecSDimitry Andric 
2458b60736ecSDimitry Andric         // Unknown value, track new.
2459145449b1SDimitry Andric         if (!ReplVal) {
2460b60736ecSDimitry Andric           ReplVal = NewReplVal;
2461b60736ecSDimitry Andric           break;
2462cfca06d7SDimitry Andric         }
2463cfca06d7SDimitry Andric 
2464b60736ecSDimitry Andric         // if (NewReplVal.hasValue())
2465b60736ecSDimitry Andric         // We found a new value, we can't know the icv value anymore.
2466b60736ecSDimitry Andric         if (ReplVal != NewReplVal)
2467cfca06d7SDimitry Andric           return nullptr;
2468cfca06d7SDimitry Andric       }
2469b60736ecSDimitry Andric 
2470b60736ecSDimitry Andric       // If we are in the same BB and we have a value, we are done.
2471145449b1SDimitry Andric       if (CurrBB == I->getParent() && ReplVal)
2472b60736ecSDimitry Andric         return ReplVal;
2473b60736ecSDimitry Andric 
2474b60736ecSDimitry Andric       // Go through all predecessors and add terminators for analysis.
2475b60736ecSDimitry Andric       for (const BasicBlock *Pred : predecessors(CurrBB))
2476b60736ecSDimitry Andric         if (const Instruction *Terminator = Pred->getTerminator())
2477b60736ecSDimitry Andric           Worklist.push_back(Terminator);
2478b60736ecSDimitry Andric     }
2479b60736ecSDimitry Andric 
2480b60736ecSDimitry Andric     return ReplVal;
2481b60736ecSDimitry Andric   }
2482b60736ecSDimitry Andric };
2483b60736ecSDimitry Andric 
2484b60736ecSDimitry Andric struct AAICVTrackerFunctionReturned : AAICVTracker {
AAICVTrackerFunctionReturned__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2485b60736ecSDimitry Andric   AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
2486b60736ecSDimitry Andric       : AAICVTracker(IRP, A) {}
2487b60736ecSDimitry Andric 
2488b60736ecSDimitry Andric   // FIXME: come up with better string.
getAsStr__anon7bbaa8dc0111::AAICVTrackerFunctionReturned24897fa27ce4SDimitry Andric   const std::string getAsStr(Attributor *) const override {
2490b60736ecSDimitry Andric     return "ICVTrackerFunctionReturned";
2491b60736ecSDimitry Andric   }
2492b60736ecSDimitry Andric 
2493b60736ecSDimitry Andric   // FIXME: come up with some stats.
trackStatistics__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2494b60736ecSDimitry Andric   void trackStatistics() const override {}
2495b60736ecSDimitry Andric 
2496b60736ecSDimitry Andric   /// We don't manifest anything for this AA.
manifest__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2497b60736ecSDimitry Andric   ChangeStatus manifest(Attributor &A) override {
2498b60736ecSDimitry Andric     return ChangeStatus::UNCHANGED;
2499b60736ecSDimitry Andric   }
2500b60736ecSDimitry Andric 
2501b60736ecSDimitry Andric   // Map of ICV to their values at specific program point.
2502e3b55780SDimitry Andric   EnumeratedArray<std::optional<Value *>, InternalControlVar,
2503b60736ecSDimitry Andric                   InternalControlVar::ICV___last>
2504b60736ecSDimitry Andric       ICVReplacementValuesMap;
2505b60736ecSDimitry Andric 
2506b60736ecSDimitry Andric   /// Return the value with which \p I can be replaced for specific \p ICV.
2507e3b55780SDimitry Andric   std::optional<Value *>
getUniqueReplacementValue__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2508b60736ecSDimitry Andric   getUniqueReplacementValue(InternalControlVar ICV) const override {
2509b60736ecSDimitry Andric     return ICVReplacementValuesMap[ICV];
2510b60736ecSDimitry Andric   }
2511b60736ecSDimitry Andric 
updateImpl__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2512b60736ecSDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
2513b60736ecSDimitry Andric     ChangeStatus Changed = ChangeStatus::UNCHANGED;
25147fa27ce4SDimitry Andric     const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
2515344a3780SDimitry Andric         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
2516b60736ecSDimitry Andric 
25177fa27ce4SDimitry Andric     if (!ICVTrackingAA->isAssumedTracked())
2518b60736ecSDimitry Andric       return indicatePessimisticFixpoint();
2519b60736ecSDimitry Andric 
2520b60736ecSDimitry Andric     for (InternalControlVar ICV : TrackableICVs) {
2521e3b55780SDimitry Andric       std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
2522e3b55780SDimitry Andric       std::optional<Value *> UniqueICVValue;
2523b60736ecSDimitry Andric 
2524b60736ecSDimitry Andric       auto CheckReturnInst = [&](Instruction &I) {
2525e3b55780SDimitry Andric         std::optional<Value *> NewReplVal =
25267fa27ce4SDimitry Andric             ICVTrackingAA->getReplacementValue(ICV, &I, A);
2527b60736ecSDimitry Andric 
2528b60736ecSDimitry Andric         // If we found a second ICV value there is no unique returned value.
2529145449b1SDimitry Andric         if (UniqueICVValue && UniqueICVValue != NewReplVal)
2530b60736ecSDimitry Andric           return false;
2531b60736ecSDimitry Andric 
2532b60736ecSDimitry Andric         UniqueICVValue = NewReplVal;
2533b60736ecSDimitry Andric 
2534b60736ecSDimitry Andric         return true;
2535b60736ecSDimitry Andric       };
2536b60736ecSDimitry Andric 
2537344a3780SDimitry Andric       bool UsedAssumedInformation = false;
2538b60736ecSDimitry Andric       if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
2539344a3780SDimitry Andric                                      UsedAssumedInformation,
2540b60736ecSDimitry Andric                                      /* CheckBBLivenessOnly */ true))
2541b60736ecSDimitry Andric         UniqueICVValue = nullptr;
2542b60736ecSDimitry Andric 
2543b60736ecSDimitry Andric       if (UniqueICVValue == ReplVal)
2544b60736ecSDimitry Andric         continue;
2545b60736ecSDimitry Andric 
2546b60736ecSDimitry Andric       ReplVal = UniqueICVValue;
2547b60736ecSDimitry Andric       Changed = ChangeStatus::CHANGED;
2548b60736ecSDimitry Andric     }
2549b60736ecSDimitry Andric 
2550b60736ecSDimitry Andric     return Changed;
2551b60736ecSDimitry Andric   }
2552b60736ecSDimitry Andric };
2553b60736ecSDimitry Andric 
2554b60736ecSDimitry Andric struct AAICVTrackerCallSite : AAICVTracker {
AAICVTrackerCallSite__anon7bbaa8dc0111::AAICVTrackerCallSite2555b60736ecSDimitry Andric   AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
2556b60736ecSDimitry Andric       : AAICVTracker(IRP, A) {}
2557b60736ecSDimitry Andric 
initialize__anon7bbaa8dc0111::AAICVTrackerCallSite2558b60736ecSDimitry Andric   void initialize(Attributor &A) override {
25597fa27ce4SDimitry Andric     assert(getAnchorScope() && "Expected anchor function");
2560b60736ecSDimitry Andric 
2561b60736ecSDimitry Andric     // We only initialize this AA for getters, so we need to know which ICV it
2562b60736ecSDimitry Andric     // gets.
2563b60736ecSDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2564b60736ecSDimitry Andric     for (InternalControlVar ICV : TrackableICVs) {
2565b60736ecSDimitry Andric       auto ICVInfo = OMPInfoCache.ICVs[ICV];
2566b60736ecSDimitry Andric       auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
2567b60736ecSDimitry Andric       if (Getter.Declaration == getAssociatedFunction()) {
2568b60736ecSDimitry Andric         AssociatedICV = ICVInfo.Kind;
2569b60736ecSDimitry Andric         return;
2570b60736ecSDimitry Andric       }
2571b60736ecSDimitry Andric     }
2572b60736ecSDimitry Andric 
2573b60736ecSDimitry Andric     /// Unknown ICV.
2574b60736ecSDimitry Andric     indicatePessimisticFixpoint();
2575b60736ecSDimitry Andric   }
2576b60736ecSDimitry Andric 
manifest__anon7bbaa8dc0111::AAICVTrackerCallSite2577b60736ecSDimitry Andric   ChangeStatus manifest(Attributor &A) override {
2578145449b1SDimitry Andric     if (!ReplVal || !*ReplVal)
2579b60736ecSDimitry Andric       return ChangeStatus::UNCHANGED;
2580b60736ecSDimitry Andric 
2581145449b1SDimitry Andric     A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal);
2582b60736ecSDimitry Andric     A.deleteAfterManifest(*getCtxI());
2583b60736ecSDimitry Andric 
2584b60736ecSDimitry Andric     return ChangeStatus::CHANGED;
2585b60736ecSDimitry Andric   }
2586b60736ecSDimitry Andric 
2587b60736ecSDimitry Andric   // FIXME: come up with better string.
getAsStr__anon7bbaa8dc0111::AAICVTrackerCallSite25887fa27ce4SDimitry Andric   const std::string getAsStr(Attributor *) const override {
25897fa27ce4SDimitry Andric     return "ICVTrackerCallSite";
25907fa27ce4SDimitry Andric   }
2591b60736ecSDimitry Andric 
2592b60736ecSDimitry Andric   // FIXME: come up with some stats.
trackStatistics__anon7bbaa8dc0111::AAICVTrackerCallSite2593b60736ecSDimitry Andric   void trackStatistics() const override {}
2594b60736ecSDimitry Andric 
2595b60736ecSDimitry Andric   InternalControlVar AssociatedICV;
2596e3b55780SDimitry Andric   std::optional<Value *> ReplVal;
2597b60736ecSDimitry Andric 
updateImpl__anon7bbaa8dc0111::AAICVTrackerCallSite2598b60736ecSDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
25997fa27ce4SDimitry Andric     const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
2600344a3780SDimitry Andric         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
2601b60736ecSDimitry Andric 
2602b60736ecSDimitry Andric     // We don't have any information, so we assume it changes the ICV.
26037fa27ce4SDimitry Andric     if (!ICVTrackingAA->isAssumedTracked())
2604b60736ecSDimitry Andric       return indicatePessimisticFixpoint();
2605b60736ecSDimitry Andric 
2606e3b55780SDimitry Andric     std::optional<Value *> NewReplVal =
26077fa27ce4SDimitry Andric         ICVTrackingAA->getReplacementValue(AssociatedICV, getCtxI(), A);
2608b60736ecSDimitry Andric 
2609b60736ecSDimitry Andric     if (ReplVal == NewReplVal)
2610b60736ecSDimitry Andric       return ChangeStatus::UNCHANGED;
2611b60736ecSDimitry Andric 
2612b60736ecSDimitry Andric     ReplVal = NewReplVal;
2613b60736ecSDimitry Andric     return ChangeStatus::CHANGED;
2614b60736ecSDimitry Andric   }
2615b60736ecSDimitry Andric 
2616b60736ecSDimitry Andric   // Return the value with which associated value can be replaced for specific
2617b60736ecSDimitry Andric   // \p ICV.
2618e3b55780SDimitry Andric   std::optional<Value *>
getUniqueReplacementValue__anon7bbaa8dc0111::AAICVTrackerCallSite2619b60736ecSDimitry Andric   getUniqueReplacementValue(InternalControlVar ICV) const override {
2620b60736ecSDimitry Andric     return ReplVal;
2621b60736ecSDimitry Andric   }
2622b60736ecSDimitry Andric };
2623b60736ecSDimitry Andric 
2624b60736ecSDimitry Andric struct AAICVTrackerCallSiteReturned : AAICVTracker {
AAICVTrackerCallSiteReturned__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2625b60736ecSDimitry Andric   AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
2626b60736ecSDimitry Andric       : AAICVTracker(IRP, A) {}
2627b60736ecSDimitry Andric 
2628b60736ecSDimitry Andric   // FIXME: come up with better string.
getAsStr__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned26297fa27ce4SDimitry Andric   const std::string getAsStr(Attributor *) const override {
2630b60736ecSDimitry Andric     return "ICVTrackerCallSiteReturned";
2631b60736ecSDimitry Andric   }
2632b60736ecSDimitry Andric 
2633b60736ecSDimitry Andric   // FIXME: come up with some stats.
trackStatistics__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2634b60736ecSDimitry Andric   void trackStatistics() const override {}
2635b60736ecSDimitry Andric 
2636b60736ecSDimitry Andric   /// We don't manifest anything for this AA.
manifest__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2637b60736ecSDimitry Andric   ChangeStatus manifest(Attributor &A) override {
2638b60736ecSDimitry Andric     return ChangeStatus::UNCHANGED;
2639b60736ecSDimitry Andric   }
2640b60736ecSDimitry Andric 
2641b60736ecSDimitry Andric   // Map of ICV to their values at specific program point.
2642e3b55780SDimitry Andric   EnumeratedArray<std::optional<Value *>, InternalControlVar,
2643b60736ecSDimitry Andric                   InternalControlVar::ICV___last>
2644b60736ecSDimitry Andric       ICVReplacementValuesMap;
2645b60736ecSDimitry Andric 
2646b60736ecSDimitry Andric   /// Return the value with which associated value can be replaced for specific
2647b60736ecSDimitry Andric   /// \p ICV.
2648e3b55780SDimitry Andric   std::optional<Value *>
getUniqueReplacementValue__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2649b60736ecSDimitry Andric   getUniqueReplacementValue(InternalControlVar ICV) const override {
2650b60736ecSDimitry Andric     return ICVReplacementValuesMap[ICV];
2651b60736ecSDimitry Andric   }
2652b60736ecSDimitry Andric 
updateImpl__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2653b60736ecSDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
2654b60736ecSDimitry Andric     ChangeStatus Changed = ChangeStatus::UNCHANGED;
26557fa27ce4SDimitry Andric     const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
2656344a3780SDimitry Andric         *this, IRPosition::returned(*getAssociatedFunction()),
2657344a3780SDimitry Andric         DepClassTy::REQUIRED);
2658b60736ecSDimitry Andric 
2659b60736ecSDimitry Andric     // We don't have any information, so we assume it changes the ICV.
26607fa27ce4SDimitry Andric     if (!ICVTrackingAA->isAssumedTracked())
2661b60736ecSDimitry Andric       return indicatePessimisticFixpoint();
2662b60736ecSDimitry Andric 
2663b60736ecSDimitry Andric     for (InternalControlVar ICV : TrackableICVs) {
2664e3b55780SDimitry Andric       std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
2665e3b55780SDimitry Andric       std::optional<Value *> NewReplVal =
26667fa27ce4SDimitry Andric           ICVTrackingAA->getUniqueReplacementValue(ICV);
2667b60736ecSDimitry Andric 
2668b60736ecSDimitry Andric       if (ReplVal == NewReplVal)
2669b60736ecSDimitry Andric         continue;
2670b60736ecSDimitry Andric 
2671b60736ecSDimitry Andric       ReplVal = NewReplVal;
2672b60736ecSDimitry Andric       Changed = ChangeStatus::CHANGED;
2673b60736ecSDimitry Andric     }
2674b60736ecSDimitry Andric     return Changed;
2675b60736ecSDimitry Andric   }
2676cfca06d7SDimitry Andric };
2677344a3780SDimitry Andric 
2678b1c73532SDimitry Andric /// Determines if \p BB exits the function unconditionally itself or reaches a
2679b1c73532SDimitry Andric /// block that does through only unique successors.
hasFunctionEndAsUniqueSuccessor(const BasicBlock * BB)2680b1c73532SDimitry Andric static bool hasFunctionEndAsUniqueSuccessor(const BasicBlock *BB) {
2681b1c73532SDimitry Andric   if (succ_empty(BB))
2682b1c73532SDimitry Andric     return true;
2683b1c73532SDimitry Andric   const BasicBlock *const Successor = BB->getUniqueSuccessor();
2684b1c73532SDimitry Andric   if (!Successor)
2685b1c73532SDimitry Andric     return false;
2686b1c73532SDimitry Andric   return hasFunctionEndAsUniqueSuccessor(Successor);
2687b1c73532SDimitry Andric }
2688b1c73532SDimitry Andric 
2689344a3780SDimitry Andric struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction__anon7bbaa8dc0111::AAExecutionDomainFunction2690344a3780SDimitry Andric   AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
2691344a3780SDimitry Andric       : AAExecutionDomain(IRP, A) {}
2692344a3780SDimitry Andric 
~AAExecutionDomainFunction__anon7bbaa8dc0111::AAExecutionDomainFunction26937fa27ce4SDimitry Andric   ~AAExecutionDomainFunction() { delete RPOT; }
2694e3b55780SDimitry Andric 
initialize__anon7bbaa8dc0111::AAExecutionDomainFunction2695e3b55780SDimitry Andric   void initialize(Attributor &A) override {
26967fa27ce4SDimitry Andric     Function *F = getAnchorScope();
26977fa27ce4SDimitry Andric     assert(F && "Expected anchor function");
26987fa27ce4SDimitry Andric     RPOT = new ReversePostOrderTraversal<Function *>(F);
2699e3b55780SDimitry Andric   }
2700e3b55780SDimitry Andric 
getAsStr__anon7bbaa8dc0111::AAExecutionDomainFunction27017fa27ce4SDimitry Andric   const std::string getAsStr(Attributor *) const override {
27027fa27ce4SDimitry Andric     unsigned TotalBlocks = 0, InitialThreadBlocks = 0, AlignedBlocks = 0;
2703e3b55780SDimitry Andric     for (auto &It : BEDMap) {
27047fa27ce4SDimitry Andric       if (!It.getFirst())
27057fa27ce4SDimitry Andric         continue;
2706e3b55780SDimitry Andric       TotalBlocks++;
2707e3b55780SDimitry Andric       InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly;
27087fa27ce4SDimitry Andric       AlignedBlocks += It.getSecond().IsReachedFromAlignedBarrierOnly &&
27097fa27ce4SDimitry Andric                        It.getSecond().IsReachingAlignedBarrierOnly;
2710e3b55780SDimitry Andric     }
2711e3b55780SDimitry Andric     return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" +
27127fa27ce4SDimitry Andric            std::to_string(AlignedBlocks) + " of " +
27137fa27ce4SDimitry Andric            std::to_string(TotalBlocks) +
27147fa27ce4SDimitry Andric            " executed by initial thread / aligned";
2715344a3780SDimitry Andric   }
2716344a3780SDimitry Andric 
2717344a3780SDimitry Andric   /// See AbstractAttribute::trackStatistics().
trackStatistics__anon7bbaa8dc0111::AAExecutionDomainFunction2718344a3780SDimitry Andric   void trackStatistics() const override {}
2719344a3780SDimitry Andric 
manifest__anon7bbaa8dc0111::AAExecutionDomainFunction2720344a3780SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
2721344a3780SDimitry Andric     LLVM_DEBUG({
2722e3b55780SDimitry Andric       for (const BasicBlock &BB : *getAnchorScope()) {
2723e3b55780SDimitry Andric         if (!isExecutedByInitialThreadOnly(BB))
2724e3b55780SDimitry Andric           continue;
2725344a3780SDimitry Andric         dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
2726e3b55780SDimitry Andric                << BB.getName() << " is executed by a single thread.\n";
2727e3b55780SDimitry Andric       }
2728344a3780SDimitry Andric     });
2729e3b55780SDimitry Andric 
2730e3b55780SDimitry Andric     ChangeStatus Changed = ChangeStatus::UNCHANGED;
2731e3b55780SDimitry Andric 
2732e3b55780SDimitry Andric     if (DisableOpenMPOptBarrierElimination)
2733e3b55780SDimitry Andric       return Changed;
2734e3b55780SDimitry Andric 
2735e3b55780SDimitry Andric     SmallPtrSet<CallBase *, 16> DeletedBarriers;
2736e3b55780SDimitry Andric     auto HandleAlignedBarrier = [&](CallBase *CB) {
27377fa27ce4SDimitry Andric       const ExecutionDomainTy &ED = CB ? CEDMap[{CB, PRE}] : BEDMap[nullptr];
2738e3b55780SDimitry Andric       if (!ED.IsReachedFromAlignedBarrierOnly ||
2739e3b55780SDimitry Andric           ED.EncounteredNonLocalSideEffect)
2740e3b55780SDimitry Andric         return;
2741b1c73532SDimitry Andric       if (!ED.EncounteredAssumes.empty() && !A.isModulePass())
2742b1c73532SDimitry Andric         return;
2743e3b55780SDimitry Andric 
2744b1c73532SDimitry Andric       // We can remove this barrier, if it is one, or aligned barriers reaching
2745b1c73532SDimitry Andric       // the kernel end (if CB is nullptr). Aligned barriers reaching the kernel
2746b1c73532SDimitry Andric       // end should only be removed if the kernel end is their unique successor;
2747b1c73532SDimitry Andric       // otherwise, they may have side-effects that aren't accounted for in the
2748b1c73532SDimitry Andric       // kernel end in their other successors. If those barriers have other
2749b1c73532SDimitry Andric       // barriers reaching them, those can be transitively removed as well as
2750b1c73532SDimitry Andric       // long as the kernel end is also their unique successor.
2751e3b55780SDimitry Andric       if (CB) {
2752e3b55780SDimitry Andric         DeletedBarriers.insert(CB);
2753e3b55780SDimitry Andric         A.deleteAfterManifest(*CB);
2754e3b55780SDimitry Andric         ++NumBarriersEliminated;
2755e3b55780SDimitry Andric         Changed = ChangeStatus::CHANGED;
2756e3b55780SDimitry Andric       } else if (!ED.AlignedBarriers.empty()) {
2757e3b55780SDimitry Andric         Changed = ChangeStatus::CHANGED;
2758e3b55780SDimitry Andric         SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(),
2759e3b55780SDimitry Andric                                          ED.AlignedBarriers.end());
2760e3b55780SDimitry Andric         SmallSetVector<CallBase *, 16> Visited;
2761e3b55780SDimitry Andric         while (!Worklist.empty()) {
2762e3b55780SDimitry Andric           CallBase *LastCB = Worklist.pop_back_val();
2763e3b55780SDimitry Andric           if (!Visited.insert(LastCB))
2764e3b55780SDimitry Andric             continue;
27657fa27ce4SDimitry Andric           if (LastCB->getFunction() != getAnchorScope())
27667fa27ce4SDimitry Andric             continue;
2767b1c73532SDimitry Andric           if (!hasFunctionEndAsUniqueSuccessor(LastCB->getParent()))
2768b1c73532SDimitry Andric             continue;
2769e3b55780SDimitry Andric           if (!DeletedBarriers.count(LastCB)) {
2770b1c73532SDimitry Andric             ++NumBarriersEliminated;
2771e3b55780SDimitry Andric             A.deleteAfterManifest(*LastCB);
2772e3b55780SDimitry Andric             continue;
2773e3b55780SDimitry Andric           }
2774e3b55780SDimitry Andric           // The final aligned barrier (LastCB) reaching the kernel end was
2775e3b55780SDimitry Andric           // removed already. This means we can go one step further and remove
2776e3b55780SDimitry Andric           // the barriers encoutered last before (LastCB).
27777fa27ce4SDimitry Andric           const ExecutionDomainTy &LastED = CEDMap[{LastCB, PRE}];
2778e3b55780SDimitry Andric           Worklist.append(LastED.AlignedBarriers.begin(),
2779e3b55780SDimitry Andric                           LastED.AlignedBarriers.end());
2780e3b55780SDimitry Andric         }
2781344a3780SDimitry Andric       }
2782344a3780SDimitry Andric 
2783e3b55780SDimitry Andric       // If we actually eliminated a barrier we need to eliminate the associated
2784e3b55780SDimitry Andric       // llvm.assumes as well to avoid creating UB.
2785e3b55780SDimitry Andric       if (!ED.EncounteredAssumes.empty() && (CB || !ED.AlignedBarriers.empty()))
2786e3b55780SDimitry Andric         for (auto *AssumeCB : ED.EncounteredAssumes)
2787e3b55780SDimitry Andric           A.deleteAfterManifest(*AssumeCB);
2788344a3780SDimitry Andric     };
2789344a3780SDimitry Andric 
2790e3b55780SDimitry Andric     for (auto *CB : AlignedBarriers)
2791e3b55780SDimitry Andric       HandleAlignedBarrier(CB);
2792344a3780SDimitry Andric 
2793e3b55780SDimitry Andric     // Handle the "kernel end barrier" for kernels too.
2794b1c73532SDimitry Andric     if (omp::isOpenMPKernel(*getAnchorScope()))
2795e3b55780SDimitry Andric       HandleAlignedBarrier(nullptr);
2796e3b55780SDimitry Andric 
2797e3b55780SDimitry Andric     return Changed;
2798e3b55780SDimitry Andric   }
2799e3b55780SDimitry Andric 
isNoOpFence__anon7bbaa8dc0111::AAExecutionDomainFunction28007fa27ce4SDimitry Andric   bool isNoOpFence(const FenceInst &FI) const override {
28017fa27ce4SDimitry Andric     return getState().isValidState() && !NonNoOpFences.count(&FI);
28027fa27ce4SDimitry Andric   }
28037fa27ce4SDimitry Andric 
2804e3b55780SDimitry Andric   /// Merge barrier and assumption information from \p PredED into the successor
2805e3b55780SDimitry Andric   /// \p ED.
2806e3b55780SDimitry Andric   void
2807e3b55780SDimitry Andric   mergeInPredecessorBarriersAndAssumptions(Attributor &A, ExecutionDomainTy &ED,
2808e3b55780SDimitry Andric                                            const ExecutionDomainTy &PredED);
2809e3b55780SDimitry Andric 
2810e3b55780SDimitry Andric   /// Merge all information from \p PredED into the successor \p ED. If
2811e3b55780SDimitry Andric   /// \p InitialEdgeOnly is set, only the initial edge will enter the block
2812e3b55780SDimitry Andric   /// represented by \p ED from this predecessor.
28137fa27ce4SDimitry Andric   bool mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED,
2814e3b55780SDimitry Andric                           const ExecutionDomainTy &PredED,
2815e3b55780SDimitry Andric                           bool InitialEdgeOnly = false);
2816e3b55780SDimitry Andric 
2817e3b55780SDimitry Andric   /// Accumulate information for the entry block in \p EntryBBED.
28187fa27ce4SDimitry Andric   bool handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED);
2819e3b55780SDimitry Andric 
2820e3b55780SDimitry Andric   /// See AbstractAttribute::updateImpl.
2821e3b55780SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override;
2822e3b55780SDimitry Andric 
2823e3b55780SDimitry Andric   /// Query interface, see AAExecutionDomain
2824e3b55780SDimitry Andric   ///{
isExecutedByInitialThreadOnly__anon7bbaa8dc0111::AAExecutionDomainFunction2825e3b55780SDimitry Andric   bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
2826e3b55780SDimitry Andric     if (!isValidState())
2827e3b55780SDimitry Andric       return false;
28287fa27ce4SDimitry Andric     assert(BB.getParent() == getAnchorScope() && "Block is out of scope!");
2829e3b55780SDimitry Andric     return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly;
2830e3b55780SDimitry Andric   }
2831e3b55780SDimitry Andric 
isExecutedInAlignedRegion__anon7bbaa8dc0111::AAExecutionDomainFunction2832e3b55780SDimitry Andric   bool isExecutedInAlignedRegion(Attributor &A,
2833e3b55780SDimitry Andric                                  const Instruction &I) const override {
28347fa27ce4SDimitry Andric     assert(I.getFunction() == getAnchorScope() &&
28357fa27ce4SDimitry Andric            "Instruction is out of scope!");
28367fa27ce4SDimitry Andric     if (!isValidState())
2837e3b55780SDimitry Andric       return false;
2838e3b55780SDimitry Andric 
28397fa27ce4SDimitry Andric     bool ForwardIsOk = true;
2840e3b55780SDimitry Andric     const Instruction *CurI;
2841e3b55780SDimitry Andric 
2842e3b55780SDimitry Andric     // Check forward until a call or the block end is reached.
2843e3b55780SDimitry Andric     CurI = &I;
2844e3b55780SDimitry Andric     do {
2845e3b55780SDimitry Andric       auto *CB = dyn_cast<CallBase>(CurI);
2846e3b55780SDimitry Andric       if (!CB)
2847e3b55780SDimitry Andric         continue;
28487fa27ce4SDimitry Andric       if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
28497fa27ce4SDimitry Andric         return true;
28507fa27ce4SDimitry Andric       const auto &It = CEDMap.find({CB, PRE});
2851e3b55780SDimitry Andric       if (It == CEDMap.end())
2852e3b55780SDimitry Andric         continue;
28537fa27ce4SDimitry Andric       if (!It->getSecond().IsReachingAlignedBarrierOnly)
28547fa27ce4SDimitry Andric         ForwardIsOk = false;
28557fa27ce4SDimitry Andric       break;
2856e3b55780SDimitry Andric     } while ((CurI = CurI->getNextNonDebugInstruction()));
2857e3b55780SDimitry Andric 
28587fa27ce4SDimitry Andric     if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
28597fa27ce4SDimitry Andric       ForwardIsOk = false;
2860e3b55780SDimitry Andric 
2861e3b55780SDimitry Andric     // Check backward until a call or the block beginning is reached.
2862e3b55780SDimitry Andric     CurI = &I;
2863e3b55780SDimitry Andric     do {
2864e3b55780SDimitry Andric       auto *CB = dyn_cast<CallBase>(CurI);
2865e3b55780SDimitry Andric       if (!CB)
2866e3b55780SDimitry Andric         continue;
28677fa27ce4SDimitry Andric       if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
28687fa27ce4SDimitry Andric         return true;
28697fa27ce4SDimitry Andric       const auto &It = CEDMap.find({CB, POST});
2870e3b55780SDimitry Andric       if (It == CEDMap.end())
2871e3b55780SDimitry Andric         continue;
2872e3b55780SDimitry Andric       if (It->getSecond().IsReachedFromAlignedBarrierOnly)
2873e3b55780SDimitry Andric         break;
2874e3b55780SDimitry Andric       return false;
2875e3b55780SDimitry Andric     } while ((CurI = CurI->getPrevNonDebugInstruction()));
2876e3b55780SDimitry Andric 
28777fa27ce4SDimitry Andric     // Delayed decision on the forward pass to allow aligned barrier detection
28787fa27ce4SDimitry Andric     // in the backwards traversal.
28797fa27ce4SDimitry Andric     if (!ForwardIsOk)
28807fa27ce4SDimitry Andric       return false;
28817fa27ce4SDimitry Andric 
28827fa27ce4SDimitry Andric     if (!CurI) {
28837fa27ce4SDimitry Andric       const BasicBlock *BB = I.getParent();
28847fa27ce4SDimitry Andric       if (BB == &BB->getParent()->getEntryBlock())
28857fa27ce4SDimitry Andric         return BEDMap.lookup(nullptr).IsReachedFromAlignedBarrierOnly;
28867fa27ce4SDimitry Andric       if (!llvm::all_of(predecessors(BB), [&](const BasicBlock *PredBB) {
2887e3b55780SDimitry Andric             return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly;
2888e3b55780SDimitry Andric           })) {
2889e3b55780SDimitry Andric         return false;
2890e3b55780SDimitry Andric       }
28917fa27ce4SDimitry Andric     }
2892e3b55780SDimitry Andric 
2893e3b55780SDimitry Andric     // On neither traversal we found a anything but aligned barriers.
2894e3b55780SDimitry Andric     return true;
2895e3b55780SDimitry Andric   }
2896e3b55780SDimitry Andric 
getExecutionDomain__anon7bbaa8dc0111::AAExecutionDomainFunction2897e3b55780SDimitry Andric   ExecutionDomainTy getExecutionDomain(const BasicBlock &BB) const override {
2898e3b55780SDimitry Andric     assert(isValidState() &&
2899e3b55780SDimitry Andric            "No request should be made against an invalid state!");
2900e3b55780SDimitry Andric     return BEDMap.lookup(&BB);
2901e3b55780SDimitry Andric   }
29027fa27ce4SDimitry Andric   std::pair<ExecutionDomainTy, ExecutionDomainTy>
getExecutionDomain__anon7bbaa8dc0111::AAExecutionDomainFunction29037fa27ce4SDimitry Andric   getExecutionDomain(const CallBase &CB) const override {
2904e3b55780SDimitry Andric     assert(isValidState() &&
2905e3b55780SDimitry Andric            "No request should be made against an invalid state!");
29067fa27ce4SDimitry Andric     return {CEDMap.lookup({&CB, PRE}), CEDMap.lookup({&CB, POST})};
2907e3b55780SDimitry Andric   }
getFunctionExecutionDomain__anon7bbaa8dc0111::AAExecutionDomainFunction2908e3b55780SDimitry Andric   ExecutionDomainTy getFunctionExecutionDomain() const override {
2909e3b55780SDimitry Andric     assert(isValidState() &&
2910e3b55780SDimitry Andric            "No request should be made against an invalid state!");
29117fa27ce4SDimitry Andric     return InterProceduralED;
2912e3b55780SDimitry Andric   }
2913e3b55780SDimitry Andric   ///}
2914344a3780SDimitry Andric 
2915c0981da4SDimitry Andric   // Check if the edge into the successor block contains a condition that only
2916c0981da4SDimitry Andric   // lets the main thread execute it.
isInitialThreadOnlyEdge__anon7bbaa8dc0111::AAExecutionDomainFunction2917e3b55780SDimitry Andric   static bool isInitialThreadOnlyEdge(Attributor &A, BranchInst *Edge,
2918e3b55780SDimitry Andric                                       BasicBlock &SuccessorBB) {
2919344a3780SDimitry Andric     if (!Edge || !Edge->isConditional())
2920344a3780SDimitry Andric       return false;
2921e3b55780SDimitry Andric     if (Edge->getSuccessor(0) != &SuccessorBB)
2922344a3780SDimitry Andric       return false;
2923344a3780SDimitry Andric 
2924344a3780SDimitry Andric     auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
2925344a3780SDimitry Andric     if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
2926344a3780SDimitry Andric       return false;
2927344a3780SDimitry Andric 
2928344a3780SDimitry Andric     ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
2929344a3780SDimitry Andric     if (!C)
2930344a3780SDimitry Andric       return false;
2931344a3780SDimitry Andric 
2932344a3780SDimitry Andric     // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
2933344a3780SDimitry Andric     if (C->isAllOnesValue()) {
2934344a3780SDimitry Andric       auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
2935e3b55780SDimitry Andric       auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2936e3b55780SDimitry Andric       auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2937344a3780SDimitry Andric       CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
2938344a3780SDimitry Andric       if (!CB)
2939344a3780SDimitry Andric         return false;
2940b1c73532SDimitry Andric       ConstantStruct *KernelEnvC =
2941b1c73532SDimitry Andric           KernelInfo::getKernelEnvironementFromKernelInitCB(CB);
2942b1c73532SDimitry Andric       ConstantInt *ExecModeC =
2943b1c73532SDimitry Andric           KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
2944b1c73532SDimitry Andric       return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC;
2945c0981da4SDimitry Andric     }
2946c0981da4SDimitry Andric 
2947c0981da4SDimitry Andric     if (C->isZero()) {
2948c0981da4SDimitry Andric       // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
2949c0981da4SDimitry Andric       if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
2950c0981da4SDimitry Andric         if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
2951c0981da4SDimitry Andric           return true;
2952c0981da4SDimitry Andric 
2953c0981da4SDimitry Andric       // Match: 0 == llvm.amdgcn.workitem.id.x()
2954c0981da4SDimitry Andric       if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
2955c0981da4SDimitry Andric         if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
2956c0981da4SDimitry Andric           return true;
2957344a3780SDimitry Andric     }
2958344a3780SDimitry Andric 
2959344a3780SDimitry Andric     return false;
2960344a3780SDimitry Andric   };
2961344a3780SDimitry Andric 
29627fa27ce4SDimitry Andric   /// Mapping containing information about the function for other AAs.
29637fa27ce4SDimitry Andric   ExecutionDomainTy InterProceduralED;
29647fa27ce4SDimitry Andric 
29657fa27ce4SDimitry Andric   enum Direction { PRE = 0, POST = 1 };
2966e3b55780SDimitry Andric   /// Mapping containing information per block.
2967e3b55780SDimitry Andric   DenseMap<const BasicBlock *, ExecutionDomainTy> BEDMap;
29687fa27ce4SDimitry Andric   DenseMap<PointerIntPair<const CallBase *, 1, Direction>, ExecutionDomainTy>
29697fa27ce4SDimitry Andric       CEDMap;
2970e3b55780SDimitry Andric   SmallSetVector<CallBase *, 16> AlignedBarriers;
2971344a3780SDimitry Andric 
2972e3b55780SDimitry Andric   ReversePostOrderTraversal<Function *> *RPOT = nullptr;
29737fa27ce4SDimitry Andric 
29747fa27ce4SDimitry Andric   /// Set \p R to \V and report true if that changed \p R.
setAndRecord__anon7bbaa8dc0111::AAExecutionDomainFunction29757fa27ce4SDimitry Andric   static bool setAndRecord(bool &R, bool V) {
29767fa27ce4SDimitry Andric     bool Eq = (R == V);
29777fa27ce4SDimitry Andric     R = V;
29787fa27ce4SDimitry Andric     return !Eq;
29797fa27ce4SDimitry Andric   }
29807fa27ce4SDimitry Andric 
29817fa27ce4SDimitry Andric   /// Collection of fences known to be non-no-opt. All fences not in this set
29827fa27ce4SDimitry Andric   /// can be assumed no-opt.
29837fa27ce4SDimitry Andric   SmallPtrSet<const FenceInst *, 8> NonNoOpFences;
2984344a3780SDimitry Andric };
2985344a3780SDimitry Andric 
mergeInPredecessorBarriersAndAssumptions(Attributor & A,ExecutionDomainTy & ED,const ExecutionDomainTy & PredED)2986e3b55780SDimitry Andric void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
2987e3b55780SDimitry Andric     Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED) {
2988e3b55780SDimitry Andric   for (auto *EA : PredED.EncounteredAssumes)
2989e3b55780SDimitry Andric     ED.addAssumeInst(A, *EA);
2990e3b55780SDimitry Andric 
2991e3b55780SDimitry Andric   for (auto *AB : PredED.AlignedBarriers)
2992e3b55780SDimitry Andric     ED.addAlignedBarrier(A, *AB);
2993344a3780SDimitry Andric }
2994344a3780SDimitry Andric 
mergeInPredecessor(Attributor & A,ExecutionDomainTy & ED,const ExecutionDomainTy & PredED,bool InitialEdgeOnly)29957fa27ce4SDimitry Andric bool AAExecutionDomainFunction::mergeInPredecessor(
2996e3b55780SDimitry Andric     Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED,
2997e3b55780SDimitry Andric     bool InitialEdgeOnly) {
2998e3b55780SDimitry Andric 
29997fa27ce4SDimitry Andric   bool Changed = false;
30007fa27ce4SDimitry Andric   Changed |=
30017fa27ce4SDimitry Andric       setAndRecord(ED.IsExecutedByInitialThreadOnly,
30027fa27ce4SDimitry Andric                    InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly &&
30037fa27ce4SDimitry Andric                                        ED.IsExecutedByInitialThreadOnly));
30047fa27ce4SDimitry Andric 
30057fa27ce4SDimitry Andric   Changed |= setAndRecord(ED.IsReachedFromAlignedBarrierOnly,
30067fa27ce4SDimitry Andric                           ED.IsReachedFromAlignedBarrierOnly &&
30077fa27ce4SDimitry Andric                               PredED.IsReachedFromAlignedBarrierOnly);
30087fa27ce4SDimitry Andric   Changed |= setAndRecord(ED.EncounteredNonLocalSideEffect,
30097fa27ce4SDimitry Andric                           ED.EncounteredNonLocalSideEffect |
30107fa27ce4SDimitry Andric                               PredED.EncounteredNonLocalSideEffect);
30117fa27ce4SDimitry Andric   // Do not track assumptions and barriers as part of Changed.
3012e3b55780SDimitry Andric   if (ED.IsReachedFromAlignedBarrierOnly)
3013e3b55780SDimitry Andric     mergeInPredecessorBarriersAndAssumptions(A, ED, PredED);
3014e3b55780SDimitry Andric   else
3015e3b55780SDimitry Andric     ED.clearAssumeInstAndAlignedBarriers();
30167fa27ce4SDimitry Andric   return Changed;
3017e3b55780SDimitry Andric }
3018e3b55780SDimitry Andric 
handleCallees(Attributor & A,ExecutionDomainTy & EntryBBED)30197fa27ce4SDimitry Andric bool AAExecutionDomainFunction::handleCallees(Attributor &A,
3020e3b55780SDimitry Andric                                               ExecutionDomainTy &EntryBBED) {
30217fa27ce4SDimitry Andric   SmallVector<std::pair<ExecutionDomainTy, ExecutionDomainTy>, 4> CallSiteEDs;
3022e3b55780SDimitry Andric   auto PredForCallSite = [&](AbstractCallSite ACS) {
30237fa27ce4SDimitry Andric     const auto *EDAA = A.getAAFor<AAExecutionDomain>(
3024e3b55780SDimitry Andric         *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
3025e3b55780SDimitry Andric         DepClassTy::OPTIONAL);
30267fa27ce4SDimitry Andric     if (!EDAA || !EDAA->getState().isValidState())
3027e3b55780SDimitry Andric       return false;
30287fa27ce4SDimitry Andric     CallSiteEDs.emplace_back(
30297fa27ce4SDimitry Andric         EDAA->getExecutionDomain(*cast<CallBase>(ACS.getInstruction())));
3030e3b55780SDimitry Andric     return true;
3031e3b55780SDimitry Andric   };
3032e3b55780SDimitry Andric 
30337fa27ce4SDimitry Andric   ExecutionDomainTy ExitED;
3034e3b55780SDimitry Andric   bool AllCallSitesKnown;
3035e3b55780SDimitry Andric   if (A.checkForAllCallSites(PredForCallSite, *this,
3036e3b55780SDimitry Andric                              /* RequiresAllCallSites */ true,
3037e3b55780SDimitry Andric                              AllCallSitesKnown)) {
30387fa27ce4SDimitry Andric     for (const auto &[CSInED, CSOutED] : CallSiteEDs) {
30397fa27ce4SDimitry Andric       mergeInPredecessor(A, EntryBBED, CSInED);
30407fa27ce4SDimitry Andric       ExitED.IsReachingAlignedBarrierOnly &=
30417fa27ce4SDimitry Andric           CSOutED.IsReachingAlignedBarrierOnly;
30427fa27ce4SDimitry Andric     }
3043e3b55780SDimitry Andric 
3044e3b55780SDimitry Andric   } else {
3045e3b55780SDimitry Andric     // We could not find all predecessors, so this is either a kernel or a
3046e3b55780SDimitry Andric     // function with external linkage (or with some other weird uses).
3047b1c73532SDimitry Andric     if (omp::isOpenMPKernel(*getAnchorScope())) {
3048e3b55780SDimitry Andric       EntryBBED.IsExecutedByInitialThreadOnly = false;
3049e3b55780SDimitry Andric       EntryBBED.IsReachedFromAlignedBarrierOnly = true;
3050e3b55780SDimitry Andric       EntryBBED.EncounteredNonLocalSideEffect = false;
3051b1c73532SDimitry Andric       ExitED.IsReachingAlignedBarrierOnly = false;
3052e3b55780SDimitry Andric     } else {
3053e3b55780SDimitry Andric       EntryBBED.IsExecutedByInitialThreadOnly = false;
3054e3b55780SDimitry Andric       EntryBBED.IsReachedFromAlignedBarrierOnly = false;
3055e3b55780SDimitry Andric       EntryBBED.EncounteredNonLocalSideEffect = true;
30567fa27ce4SDimitry Andric       ExitED.IsReachingAlignedBarrierOnly = false;
3057e3b55780SDimitry Andric     }
3058e3b55780SDimitry Andric   }
3059e3b55780SDimitry Andric 
30607fa27ce4SDimitry Andric   bool Changed = false;
3061e3b55780SDimitry Andric   auto &FnED = BEDMap[nullptr];
30627fa27ce4SDimitry Andric   Changed |= setAndRecord(FnED.IsReachedFromAlignedBarrierOnly,
30637fa27ce4SDimitry Andric                           FnED.IsReachedFromAlignedBarrierOnly &
30647fa27ce4SDimitry Andric                               EntryBBED.IsReachedFromAlignedBarrierOnly);
30657fa27ce4SDimitry Andric   Changed |= setAndRecord(FnED.IsReachingAlignedBarrierOnly,
30667fa27ce4SDimitry Andric                           FnED.IsReachingAlignedBarrierOnly &
30677fa27ce4SDimitry Andric                               ExitED.IsReachingAlignedBarrierOnly);
30687fa27ce4SDimitry Andric   Changed |= setAndRecord(FnED.IsExecutedByInitialThreadOnly,
30697fa27ce4SDimitry Andric                           EntryBBED.IsExecutedByInitialThreadOnly);
30707fa27ce4SDimitry Andric   return Changed;
3071e3b55780SDimitry Andric }
3072e3b55780SDimitry Andric 
updateImpl(Attributor & A)3073e3b55780SDimitry Andric ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
3074e3b55780SDimitry Andric 
3075e3b55780SDimitry Andric   bool Changed = false;
3076e3b55780SDimitry Andric 
3077e3b55780SDimitry Andric   // Helper to deal with an aligned barrier encountered during the forward
3078e3b55780SDimitry Andric   // traversal. \p CB is the aligned barrier, \p ED is the execution domain when
3079e3b55780SDimitry Andric   // it was encountered.
30807fa27ce4SDimitry Andric   auto HandleAlignedBarrier = [&](CallBase &CB, ExecutionDomainTy &ED) {
30817fa27ce4SDimitry Andric     Changed |= AlignedBarriers.insert(&CB);
3082e3b55780SDimitry Andric     // First, update the barrier ED kept in the separate CEDMap.
30837fa27ce4SDimitry Andric     auto &CallInED = CEDMap[{&CB, PRE}];
30847fa27ce4SDimitry Andric     Changed |= mergeInPredecessor(A, CallInED, ED);
30857fa27ce4SDimitry Andric     CallInED.IsReachingAlignedBarrierOnly = true;
3086e3b55780SDimitry Andric     // Next adjust the ED we use for the traversal.
3087e3b55780SDimitry Andric     ED.EncounteredNonLocalSideEffect = false;
3088e3b55780SDimitry Andric     ED.IsReachedFromAlignedBarrierOnly = true;
3089e3b55780SDimitry Andric     // Aligned barrier collection has to come last.
3090e3b55780SDimitry Andric     ED.clearAssumeInstAndAlignedBarriers();
30917fa27ce4SDimitry Andric     ED.addAlignedBarrier(A, CB);
30927fa27ce4SDimitry Andric     auto &CallOutED = CEDMap[{&CB, POST}];
30937fa27ce4SDimitry Andric     Changed |= mergeInPredecessor(A, CallOutED, ED);
3094e3b55780SDimitry Andric   };
3095e3b55780SDimitry Andric 
30967fa27ce4SDimitry Andric   auto *LivenessAA =
3097e3b55780SDimitry Andric       A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
3098e3b55780SDimitry Andric 
3099e3b55780SDimitry Andric   Function *F = getAnchorScope();
3100e3b55780SDimitry Andric   BasicBlock &EntryBB = F->getEntryBlock();
3101b1c73532SDimitry Andric   bool IsKernel = omp::isOpenMPKernel(*F);
3102e3b55780SDimitry Andric 
3103e3b55780SDimitry Andric   SmallVector<Instruction *> SyncInstWorklist;
3104e3b55780SDimitry Andric   for (auto &RIt : *RPOT) {
3105e3b55780SDimitry Andric     BasicBlock &BB = *RIt;
3106e3b55780SDimitry Andric 
3107e3b55780SDimitry Andric     bool IsEntryBB = &BB == &EntryBB;
3108e3b55780SDimitry Andric     // TODO: We use local reasoning since we don't have a divergence analysis
3109e3b55780SDimitry Andric     // 	     running as well. We could basically allow uniform branches here.
3110e3b55780SDimitry Andric     bool AlignedBarrierLastInBlock = IsEntryBB && IsKernel;
31117fa27ce4SDimitry Andric     bool IsExplicitlyAligned = IsEntryBB && IsKernel;
3112e3b55780SDimitry Andric     ExecutionDomainTy ED;
3113e3b55780SDimitry Andric     // Propagate "incoming edges" into information about this block.
3114e3b55780SDimitry Andric     if (IsEntryBB) {
31157fa27ce4SDimitry Andric       Changed |= handleCallees(A, ED);
3116e3b55780SDimitry Andric     } else {
3117e3b55780SDimitry Andric       // For live non-entry blocks we only propagate
3118e3b55780SDimitry Andric       // information via live edges.
31197fa27ce4SDimitry Andric       if (LivenessAA && LivenessAA->isAssumedDead(&BB))
3120e3b55780SDimitry Andric         continue;
3121e3b55780SDimitry Andric 
3122e3b55780SDimitry Andric       for (auto *PredBB : predecessors(&BB)) {
31237fa27ce4SDimitry Andric         if (LivenessAA && LivenessAA->isEdgeDead(PredBB, &BB))
3124e3b55780SDimitry Andric           continue;
3125e3b55780SDimitry Andric         bool InitialEdgeOnly = isInitialThreadOnlyEdge(
3126e3b55780SDimitry Andric             A, dyn_cast<BranchInst>(PredBB->getTerminator()), BB);
3127e3b55780SDimitry Andric         mergeInPredecessor(A, ED, BEDMap[PredBB], InitialEdgeOnly);
3128e3b55780SDimitry Andric       }
3129e3b55780SDimitry Andric     }
3130e3b55780SDimitry Andric 
3131e3b55780SDimitry Andric     // Now we traverse the block, accumulate effects in ED and attach
3132e3b55780SDimitry Andric     // information to calls.
3133e3b55780SDimitry Andric     for (Instruction &I : BB) {
3134e3b55780SDimitry Andric       bool UsedAssumedInformation;
31357fa27ce4SDimitry Andric       if (A.isAssumedDead(I, *this, LivenessAA, UsedAssumedInformation,
3136e3b55780SDimitry Andric                           /* CheckBBLivenessOnly */ false, DepClassTy::OPTIONAL,
3137e3b55780SDimitry Andric                           /* CheckForDeadStore */ true))
3138e3b55780SDimitry Andric         continue;
3139e3b55780SDimitry Andric 
3140e3b55780SDimitry Andric       // Asummes and "assume-like" (dbg, lifetime, ...) are handled first, the
3141e3b55780SDimitry Andric       // former is collected the latter is ignored.
3142e3b55780SDimitry Andric       if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
3143e3b55780SDimitry Andric         if (auto *AI = dyn_cast_or_null<AssumeInst>(II)) {
3144e3b55780SDimitry Andric           ED.addAssumeInst(A, *AI);
3145e3b55780SDimitry Andric           continue;
3146e3b55780SDimitry Andric         }
3147e3b55780SDimitry Andric         // TODO: Should we also collect and delete lifetime markers?
3148e3b55780SDimitry Andric         if (II->isAssumeLikeIntrinsic())
3149e3b55780SDimitry Andric           continue;
3150e3b55780SDimitry Andric       }
3151e3b55780SDimitry Andric 
31527fa27ce4SDimitry Andric       if (auto *FI = dyn_cast<FenceInst>(&I)) {
31537fa27ce4SDimitry Andric         if (!ED.EncounteredNonLocalSideEffect) {
31547fa27ce4SDimitry Andric           // An aligned fence without non-local side-effects is a no-op.
31557fa27ce4SDimitry Andric           if (ED.IsReachedFromAlignedBarrierOnly)
31567fa27ce4SDimitry Andric             continue;
31577fa27ce4SDimitry Andric           // A non-aligned fence without non-local side-effects is a no-op
31587fa27ce4SDimitry Andric           // if the ordering only publishes non-local side-effects (or less).
31597fa27ce4SDimitry Andric           switch (FI->getOrdering()) {
31607fa27ce4SDimitry Andric           case AtomicOrdering::NotAtomic:
31617fa27ce4SDimitry Andric             continue;
31627fa27ce4SDimitry Andric           case AtomicOrdering::Unordered:
31637fa27ce4SDimitry Andric             continue;
31647fa27ce4SDimitry Andric           case AtomicOrdering::Monotonic:
31657fa27ce4SDimitry Andric             continue;
31667fa27ce4SDimitry Andric           case AtomicOrdering::Acquire:
31677fa27ce4SDimitry Andric             break;
31687fa27ce4SDimitry Andric           case AtomicOrdering::Release:
31697fa27ce4SDimitry Andric             continue;
31707fa27ce4SDimitry Andric           case AtomicOrdering::AcquireRelease:
31717fa27ce4SDimitry Andric             break;
31727fa27ce4SDimitry Andric           case AtomicOrdering::SequentiallyConsistent:
31737fa27ce4SDimitry Andric             break;
31747fa27ce4SDimitry Andric           };
31757fa27ce4SDimitry Andric         }
31767fa27ce4SDimitry Andric         NonNoOpFences.insert(FI);
31777fa27ce4SDimitry Andric       }
31787fa27ce4SDimitry Andric 
3179e3b55780SDimitry Andric       auto *CB = dyn_cast<CallBase>(&I);
3180e3b55780SDimitry Andric       bool IsNoSync = AA::isNoSyncInst(A, I, *this);
3181e3b55780SDimitry Andric       bool IsAlignedBarrier =
3182e3b55780SDimitry Andric           !IsNoSync && CB &&
3183e3b55780SDimitry Andric           AANoSync::isAlignedBarrier(*CB, AlignedBarrierLastInBlock);
3184e3b55780SDimitry Andric 
3185e3b55780SDimitry Andric       AlignedBarrierLastInBlock &= IsNoSync;
31867fa27ce4SDimitry Andric       IsExplicitlyAligned &= IsNoSync;
3187e3b55780SDimitry Andric 
3188e3b55780SDimitry Andric       // Next we check for calls. Aligned barriers are handled
3189e3b55780SDimitry Andric       // explicitly, everything else is kept for the backward traversal and will
3190e3b55780SDimitry Andric       // also affect our state.
3191e3b55780SDimitry Andric       if (CB) {
3192e3b55780SDimitry Andric         if (IsAlignedBarrier) {
31937fa27ce4SDimitry Andric           HandleAlignedBarrier(*CB, ED);
3194e3b55780SDimitry Andric           AlignedBarrierLastInBlock = true;
31957fa27ce4SDimitry Andric           IsExplicitlyAligned = true;
3196e3b55780SDimitry Andric           continue;
3197e3b55780SDimitry Andric         }
3198e3b55780SDimitry Andric 
3199e3b55780SDimitry Andric         // Check the pointer(s) of a memory intrinsic explicitly.
3200e3b55780SDimitry Andric         if (isa<MemIntrinsic>(&I)) {
3201e3b55780SDimitry Andric           if (!ED.EncounteredNonLocalSideEffect &&
3202e3b55780SDimitry Andric               AA::isPotentiallyAffectedByBarrier(A, I, *this))
3203e3b55780SDimitry Andric             ED.EncounteredNonLocalSideEffect = true;
3204e3b55780SDimitry Andric           if (!IsNoSync) {
3205e3b55780SDimitry Andric             ED.IsReachedFromAlignedBarrierOnly = false;
3206e3b55780SDimitry Andric             SyncInstWorklist.push_back(&I);
3207e3b55780SDimitry Andric           }
3208e3b55780SDimitry Andric           continue;
3209e3b55780SDimitry Andric         }
3210e3b55780SDimitry Andric 
3211e3b55780SDimitry Andric         // Record how we entered the call, then accumulate the effect of the
3212e3b55780SDimitry Andric         // call in ED for potential use by the callee.
32137fa27ce4SDimitry Andric         auto &CallInED = CEDMap[{CB, PRE}];
32147fa27ce4SDimitry Andric         Changed |= mergeInPredecessor(A, CallInED, ED);
3215e3b55780SDimitry Andric 
3216e3b55780SDimitry Andric         // If we have a sync-definition we can check if it starts/ends in an
3217e3b55780SDimitry Andric         // aligned barrier. If we are unsure we assume any sync breaks
3218e3b55780SDimitry Andric         // alignment.
3219e3b55780SDimitry Andric         Function *Callee = CB->getCalledFunction();
3220e3b55780SDimitry Andric         if (!IsNoSync && Callee && !Callee->isDeclaration()) {
32217fa27ce4SDimitry Andric           const auto *EDAA = A.getAAFor<AAExecutionDomain>(
3222e3b55780SDimitry Andric               *this, IRPosition::function(*Callee), DepClassTy::OPTIONAL);
32237fa27ce4SDimitry Andric           if (EDAA && EDAA->getState().isValidState()) {
32247fa27ce4SDimitry Andric             const auto &CalleeED = EDAA->getFunctionExecutionDomain();
3225e3b55780SDimitry Andric             ED.IsReachedFromAlignedBarrierOnly =
3226e3b55780SDimitry Andric                 CalleeED.IsReachedFromAlignedBarrierOnly;
3227e3b55780SDimitry Andric             AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
3228e3b55780SDimitry Andric             if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
3229e3b55780SDimitry Andric               ED.EncounteredNonLocalSideEffect |=
3230e3b55780SDimitry Andric                   CalleeED.EncounteredNonLocalSideEffect;
3231e3b55780SDimitry Andric             else
3232e3b55780SDimitry Andric               ED.EncounteredNonLocalSideEffect =
3233e3b55780SDimitry Andric                   CalleeED.EncounteredNonLocalSideEffect;
32347fa27ce4SDimitry Andric             if (!CalleeED.IsReachingAlignedBarrierOnly) {
32357fa27ce4SDimitry Andric               Changed |=
32367fa27ce4SDimitry Andric                   setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
3237e3b55780SDimitry Andric               SyncInstWorklist.push_back(&I);
32387fa27ce4SDimitry Andric             }
3239e3b55780SDimitry Andric             if (CalleeED.IsReachedFromAlignedBarrierOnly)
3240e3b55780SDimitry Andric               mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED);
32417fa27ce4SDimitry Andric             auto &CallOutED = CEDMap[{CB, POST}];
32427fa27ce4SDimitry Andric             Changed |= mergeInPredecessor(A, CallOutED, ED);
3243e3b55780SDimitry Andric             continue;
3244e3b55780SDimitry Andric           }
3245e3b55780SDimitry Andric         }
32467fa27ce4SDimitry Andric         if (!IsNoSync) {
32477fa27ce4SDimitry Andric           ED.IsReachedFromAlignedBarrierOnly = false;
32487fa27ce4SDimitry Andric           Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
32497fa27ce4SDimitry Andric           SyncInstWorklist.push_back(&I);
32507fa27ce4SDimitry Andric         }
3251e3b55780SDimitry Andric         AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly;
3252e3b55780SDimitry Andric         ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory();
32537fa27ce4SDimitry Andric         auto &CallOutED = CEDMap[{CB, POST}];
32547fa27ce4SDimitry Andric         Changed |= mergeInPredecessor(A, CallOutED, ED);
3255e3b55780SDimitry Andric       }
3256e3b55780SDimitry Andric 
3257e3b55780SDimitry Andric       if (!I.mayHaveSideEffects() && !I.mayReadFromMemory())
3258e3b55780SDimitry Andric         continue;
3259e3b55780SDimitry Andric 
3260e3b55780SDimitry Andric       // If we have a callee we try to use fine-grained information to
3261e3b55780SDimitry Andric       // determine local side-effects.
3262e3b55780SDimitry Andric       if (CB) {
32637fa27ce4SDimitry Andric         const auto *MemAA = A.getAAFor<AAMemoryLocation>(
3264e3b55780SDimitry Andric             *this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
3265e3b55780SDimitry Andric 
3266e3b55780SDimitry Andric         auto AccessPred = [&](const Instruction *I, const Value *Ptr,
3267e3b55780SDimitry Andric                               AAMemoryLocation::AccessKind,
3268e3b55780SDimitry Andric                               AAMemoryLocation::MemoryLocationsKind) {
3269e3b55780SDimitry Andric           return !AA::isPotentiallyAffectedByBarrier(A, {Ptr}, *this, I);
3270e3b55780SDimitry Andric         };
32717fa27ce4SDimitry Andric         if (MemAA && MemAA->getState().isValidState() &&
32727fa27ce4SDimitry Andric             MemAA->checkForAllAccessesToMemoryKind(
3273e3b55780SDimitry Andric                 AccessPred, AAMemoryLocation::ALL_LOCATIONS))
3274e3b55780SDimitry Andric           continue;
3275e3b55780SDimitry Andric       }
3276e3b55780SDimitry Andric 
32777fa27ce4SDimitry Andric       auto &InfoCache = A.getInfoCache();
32787fa27ce4SDimitry Andric       if (!I.mayHaveSideEffects() && InfoCache.isOnlyUsedByAssume(I))
3279e3b55780SDimitry Andric         continue;
3280e3b55780SDimitry Andric 
3281e3b55780SDimitry Andric       if (auto *LI = dyn_cast<LoadInst>(&I))
3282e3b55780SDimitry Andric         if (LI->hasMetadata(LLVMContext::MD_invariant_load))
3283e3b55780SDimitry Andric           continue;
3284e3b55780SDimitry Andric 
3285e3b55780SDimitry Andric       if (!ED.EncounteredNonLocalSideEffect &&
3286e3b55780SDimitry Andric           AA::isPotentiallyAffectedByBarrier(A, I, *this))
3287e3b55780SDimitry Andric         ED.EncounteredNonLocalSideEffect = true;
3288e3b55780SDimitry Andric     }
3289e3b55780SDimitry Andric 
32907fa27ce4SDimitry Andric     bool IsEndAndNotReachingAlignedBarriersOnly = false;
3291e3b55780SDimitry Andric     if (!isa<UnreachableInst>(BB.getTerminator()) &&
3292e3b55780SDimitry Andric         !BB.getTerminator()->getNumSuccessors()) {
3293e3b55780SDimitry Andric 
32947fa27ce4SDimitry Andric       Changed |= mergeInPredecessor(A, InterProceduralED, ED);
3295e3b55780SDimitry Andric 
32967fa27ce4SDimitry Andric       auto &FnED = BEDMap[nullptr];
32977fa27ce4SDimitry Andric       if (IsKernel && !IsExplicitlyAligned)
32987fa27ce4SDimitry Andric         FnED.IsReachingAlignedBarrierOnly = false;
32997fa27ce4SDimitry Andric       Changed |= mergeInPredecessor(A, FnED, ED);
33007fa27ce4SDimitry Andric 
33017fa27ce4SDimitry Andric       if (!FnED.IsReachingAlignedBarrierOnly) {
33027fa27ce4SDimitry Andric         IsEndAndNotReachingAlignedBarriersOnly = true;
33037fa27ce4SDimitry Andric         SyncInstWorklist.push_back(BB.getTerminator());
33047fa27ce4SDimitry Andric         auto &BBED = BEDMap[&BB];
33057fa27ce4SDimitry Andric         Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false);
33067fa27ce4SDimitry Andric       }
3307e3b55780SDimitry Andric     }
3308e3b55780SDimitry Andric 
3309e3b55780SDimitry Andric     ExecutionDomainTy &StoredED = BEDMap[&BB];
33107fa27ce4SDimitry Andric     ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly &
33117fa27ce4SDimitry Andric                                       !IsEndAndNotReachingAlignedBarriersOnly;
3312e3b55780SDimitry Andric 
3313e3b55780SDimitry Andric     // Check if we computed anything different as part of the forward
3314e3b55780SDimitry Andric     // traversal. We do not take assumptions and aligned barriers into account
3315e3b55780SDimitry Andric     // as they do not influence the state we iterate. Backward traversal values
3316e3b55780SDimitry Andric     // are handled later on.
3317e3b55780SDimitry Andric     if (ED.IsExecutedByInitialThreadOnly !=
3318e3b55780SDimitry Andric             StoredED.IsExecutedByInitialThreadOnly ||
3319e3b55780SDimitry Andric         ED.IsReachedFromAlignedBarrierOnly !=
3320e3b55780SDimitry Andric             StoredED.IsReachedFromAlignedBarrierOnly ||
3321e3b55780SDimitry Andric         ED.EncounteredNonLocalSideEffect !=
3322e3b55780SDimitry Andric             StoredED.EncounteredNonLocalSideEffect)
3323e3b55780SDimitry Andric       Changed = true;
3324e3b55780SDimitry Andric 
3325e3b55780SDimitry Andric     // Update the state with the new value.
3326e3b55780SDimitry Andric     StoredED = std::move(ED);
3327e3b55780SDimitry Andric   }
3328e3b55780SDimitry Andric 
3329e3b55780SDimitry Andric   // Propagate (non-aligned) sync instruction effects backwards until the
3330e3b55780SDimitry Andric   // entry is hit or an aligned barrier.
3331e3b55780SDimitry Andric   SmallSetVector<BasicBlock *, 16> Visited;
3332e3b55780SDimitry Andric   while (!SyncInstWorklist.empty()) {
3333e3b55780SDimitry Andric     Instruction *SyncInst = SyncInstWorklist.pop_back_val();
3334e3b55780SDimitry Andric     Instruction *CurInst = SyncInst;
33357fa27ce4SDimitry Andric     bool HitAlignedBarrierOrKnownEnd = false;
3336e3b55780SDimitry Andric     while ((CurInst = CurInst->getPrevNode())) {
3337e3b55780SDimitry Andric       auto *CB = dyn_cast<CallBase>(CurInst);
3338e3b55780SDimitry Andric       if (!CB)
3339e3b55780SDimitry Andric         continue;
33407fa27ce4SDimitry Andric       auto &CallOutED = CEDMap[{CB, POST}];
33417fa27ce4SDimitry Andric       Changed |= setAndRecord(CallOutED.IsReachingAlignedBarrierOnly, false);
33427fa27ce4SDimitry Andric       auto &CallInED = CEDMap[{CB, PRE}];
33437fa27ce4SDimitry Andric       HitAlignedBarrierOrKnownEnd =
33447fa27ce4SDimitry Andric           AlignedBarriers.count(CB) || !CallInED.IsReachingAlignedBarrierOnly;
33457fa27ce4SDimitry Andric       if (HitAlignedBarrierOrKnownEnd)
3346e3b55780SDimitry Andric         break;
33477fa27ce4SDimitry Andric       Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
3348e3b55780SDimitry Andric     }
33497fa27ce4SDimitry Andric     if (HitAlignedBarrierOrKnownEnd)
3350e3b55780SDimitry Andric       continue;
3351e3b55780SDimitry Andric     BasicBlock *SyncBB = SyncInst->getParent();
3352e3b55780SDimitry Andric     for (auto *PredBB : predecessors(SyncBB)) {
33537fa27ce4SDimitry Andric       if (LivenessAA && LivenessAA->isEdgeDead(PredBB, SyncBB))
3354e3b55780SDimitry Andric         continue;
3355e3b55780SDimitry Andric       if (!Visited.insert(PredBB))
3356e3b55780SDimitry Andric         continue;
3357e3b55780SDimitry Andric       auto &PredED = BEDMap[PredBB];
33587fa27ce4SDimitry Andric       if (setAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) {
3359e3b55780SDimitry Andric         Changed = true;
33607fa27ce4SDimitry Andric         SyncInstWorklist.push_back(PredBB->getTerminator());
33617fa27ce4SDimitry Andric       }
3362e3b55780SDimitry Andric     }
3363e3b55780SDimitry Andric     if (SyncBB != &EntryBB)
3364e3b55780SDimitry Andric       continue;
33657fa27ce4SDimitry Andric     Changed |=
33667fa27ce4SDimitry Andric         setAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false);
3367e3b55780SDimitry Andric   }
3368e3b55780SDimitry Andric 
3369e3b55780SDimitry Andric   return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
3370344a3780SDimitry Andric }
3371344a3780SDimitry Andric 
3372344a3780SDimitry Andric /// Try to replace memory allocation calls called by a single thread with a
3373344a3780SDimitry Andric /// static buffer of shared memory.
3374344a3780SDimitry Andric struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
3375344a3780SDimitry Andric   using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAHeapToShared__anon7bbaa8dc0111::AAHeapToShared3376344a3780SDimitry Andric   AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3377344a3780SDimitry Andric 
3378344a3780SDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
3379344a3780SDimitry Andric   static AAHeapToShared &createForPosition(const IRPosition &IRP,
3380344a3780SDimitry Andric                                            Attributor &A);
3381344a3780SDimitry Andric 
3382344a3780SDimitry Andric   /// Returns true if HeapToShared conversion is assumed to be possible.
3383344a3780SDimitry Andric   virtual bool isAssumedHeapToShared(CallBase &CB) const = 0;
3384344a3780SDimitry Andric 
3385344a3780SDimitry Andric   /// Returns true if HeapToShared conversion is assumed and the CB is a
3386344a3780SDimitry Andric   /// callsite to a free operation to be removed.
3387344a3780SDimitry Andric   virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0;
3388344a3780SDimitry Andric 
3389344a3780SDimitry Andric   /// See AbstractAttribute::getName().
getName__anon7bbaa8dc0111::AAHeapToShared3390344a3780SDimitry Andric   const std::string getName() const override { return "AAHeapToShared"; }
3391344a3780SDimitry Andric 
3392344a3780SDimitry Andric   /// See AbstractAttribute::getIdAddr().
getIdAddr__anon7bbaa8dc0111::AAHeapToShared3393344a3780SDimitry Andric   const char *getIdAddr() const override { return &ID; }
3394344a3780SDimitry Andric 
3395344a3780SDimitry Andric   /// This function should return true if the type of the \p AA is
3396344a3780SDimitry Andric   /// AAHeapToShared.
classof__anon7bbaa8dc0111::AAHeapToShared3397344a3780SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
3398344a3780SDimitry Andric     return (AA->getIdAddr() == &ID);
3399344a3780SDimitry Andric   }
3400344a3780SDimitry Andric 
3401344a3780SDimitry Andric   /// Unique ID (due to the unique address)
3402344a3780SDimitry Andric   static const char ID;
3403344a3780SDimitry Andric };
3404344a3780SDimitry Andric 
3405344a3780SDimitry Andric struct AAHeapToSharedFunction : public AAHeapToShared {
AAHeapToSharedFunction__anon7bbaa8dc0111::AAHeapToSharedFunction3406344a3780SDimitry Andric   AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
3407344a3780SDimitry Andric       : AAHeapToShared(IRP, A) {}
3408344a3780SDimitry Andric 
getAsStr__anon7bbaa8dc0111::AAHeapToSharedFunction34097fa27ce4SDimitry Andric   const std::string getAsStr(Attributor *) const override {
3410344a3780SDimitry Andric     return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
3411344a3780SDimitry Andric            " malloc calls eligible.";
3412344a3780SDimitry Andric   }
3413344a3780SDimitry Andric 
3414344a3780SDimitry Andric   /// See AbstractAttribute::trackStatistics().
trackStatistics__anon7bbaa8dc0111::AAHeapToSharedFunction3415344a3780SDimitry Andric   void trackStatistics() const override {}
3416344a3780SDimitry Andric 
3417344a3780SDimitry Andric   /// This functions finds free calls that will be removed by the
3418344a3780SDimitry Andric   /// HeapToShared transformation.
findPotentialRemovedFreeCalls__anon7bbaa8dc0111::AAHeapToSharedFunction3419344a3780SDimitry Andric   void findPotentialRemovedFreeCalls(Attributor &A) {
3420344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3421344a3780SDimitry Andric     auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
3422344a3780SDimitry Andric 
3423344a3780SDimitry Andric     PotentialRemovedFreeCalls.clear();
3424344a3780SDimitry Andric     // Update free call users of found malloc calls.
3425344a3780SDimitry Andric     for (CallBase *CB : MallocCalls) {
3426344a3780SDimitry Andric       SmallVector<CallBase *, 4> FreeCalls;
3427344a3780SDimitry Andric       for (auto *U : CB->users()) {
3428344a3780SDimitry Andric         CallBase *C = dyn_cast<CallBase>(U);
3429344a3780SDimitry Andric         if (C && C->getCalledFunction() == FreeRFI.Declaration)
3430344a3780SDimitry Andric           FreeCalls.push_back(C);
3431344a3780SDimitry Andric       }
3432344a3780SDimitry Andric 
3433344a3780SDimitry Andric       if (FreeCalls.size() != 1)
3434344a3780SDimitry Andric         continue;
3435344a3780SDimitry Andric 
3436344a3780SDimitry Andric       PotentialRemovedFreeCalls.insert(FreeCalls.front());
3437344a3780SDimitry Andric     }
3438344a3780SDimitry Andric   }
3439344a3780SDimitry Andric 
initialize__anon7bbaa8dc0111::AAHeapToSharedFunction3440344a3780SDimitry Andric   void initialize(Attributor &A) override {
3441145449b1SDimitry Andric     if (DisableOpenMPOptDeglobalization) {
3442145449b1SDimitry Andric       indicatePessimisticFixpoint();
3443145449b1SDimitry Andric       return;
3444145449b1SDimitry Andric     }
3445145449b1SDimitry Andric 
3446344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3447344a3780SDimitry Andric     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
3448e3b55780SDimitry Andric     if (!RFI.Declaration)
3449e3b55780SDimitry Andric       return;
3450344a3780SDimitry Andric 
3451145449b1SDimitry Andric     Attributor::SimplifictionCallbackTy SCB =
3452145449b1SDimitry Andric         [](const IRPosition &, const AbstractAttribute *,
3453e3b55780SDimitry Andric            bool &) -> std::optional<Value *> { return nullptr; };
3454e3b55780SDimitry Andric 
3455e3b55780SDimitry Andric     Function *F = getAnchorScope();
3456344a3780SDimitry Andric     for (User *U : RFI.Declaration->users())
3457145449b1SDimitry Andric       if (CallBase *CB = dyn_cast<CallBase>(U)) {
3458e3b55780SDimitry Andric         if (CB->getFunction() != F)
3459e3b55780SDimitry Andric           continue;
3460344a3780SDimitry Andric         MallocCalls.insert(CB);
3461145449b1SDimitry Andric         A.registerSimplificationCallback(IRPosition::callsite_returned(*CB),
3462145449b1SDimitry Andric                                          SCB);
3463145449b1SDimitry Andric       }
3464344a3780SDimitry Andric 
3465344a3780SDimitry Andric     findPotentialRemovedFreeCalls(A);
3466344a3780SDimitry Andric   }
3467344a3780SDimitry Andric 
isAssumedHeapToShared__anon7bbaa8dc0111::AAHeapToSharedFunction3468344a3780SDimitry Andric   bool isAssumedHeapToShared(CallBase &CB) const override {
3469344a3780SDimitry Andric     return isValidState() && MallocCalls.count(&CB);
3470344a3780SDimitry Andric   }
3471344a3780SDimitry Andric 
isAssumedHeapToSharedRemovedFree__anon7bbaa8dc0111::AAHeapToSharedFunction3472344a3780SDimitry Andric   bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override {
3473344a3780SDimitry Andric     return isValidState() && PotentialRemovedFreeCalls.count(&CB);
3474344a3780SDimitry Andric   }
3475344a3780SDimitry Andric 
manifest__anon7bbaa8dc0111::AAHeapToSharedFunction3476344a3780SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
3477344a3780SDimitry Andric     if (MallocCalls.empty())
3478344a3780SDimitry Andric       return ChangeStatus::UNCHANGED;
3479344a3780SDimitry Andric 
3480344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3481344a3780SDimitry Andric     auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
3482344a3780SDimitry Andric 
3483344a3780SDimitry Andric     Function *F = getAnchorScope();
3484344a3780SDimitry Andric     auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
3485344a3780SDimitry Andric                                             DepClassTy::OPTIONAL);
3486344a3780SDimitry Andric 
3487344a3780SDimitry Andric     ChangeStatus Changed = ChangeStatus::UNCHANGED;
3488344a3780SDimitry Andric     for (CallBase *CB : MallocCalls) {
3489344a3780SDimitry Andric       // Skip replacing this if HeapToStack has already claimed it.
3490344a3780SDimitry Andric       if (HS && HS->isAssumedHeapToStack(*CB))
3491344a3780SDimitry Andric         continue;
3492344a3780SDimitry Andric 
3493344a3780SDimitry Andric       // Find the unique free call to remove it.
3494344a3780SDimitry Andric       SmallVector<CallBase *, 4> FreeCalls;
3495344a3780SDimitry Andric       for (auto *U : CB->users()) {
3496344a3780SDimitry Andric         CallBase *C = dyn_cast<CallBase>(U);
3497344a3780SDimitry Andric         if (C && C->getCalledFunction() == FreeCall.Declaration)
3498344a3780SDimitry Andric           FreeCalls.push_back(C);
3499344a3780SDimitry Andric       }
3500344a3780SDimitry Andric       if (FreeCalls.size() != 1)
3501344a3780SDimitry Andric         continue;
3502344a3780SDimitry Andric 
35036f8fc217SDimitry Andric       auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0));
3504344a3780SDimitry Andric 
3505145449b1SDimitry Andric       if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) {
3506145449b1SDimitry Andric         LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB
3507145449b1SDimitry Andric                           << " with shared memory."
3508145449b1SDimitry Andric                           << " Shared memory usage is limited to "
3509145449b1SDimitry Andric                           << SharedMemoryLimit << " bytes\n");
3510145449b1SDimitry Andric         continue;
3511145449b1SDimitry Andric       }
3512145449b1SDimitry Andric 
3513c0981da4SDimitry Andric       LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
3514c0981da4SDimitry Andric                         << " with " << AllocSize->getZExtValue()
3515344a3780SDimitry Andric                         << " bytes of shared memory\n");
3516344a3780SDimitry Andric 
3517344a3780SDimitry Andric       // Create a new shared memory buffer of the same size as the allocation
3518344a3780SDimitry Andric       // and replace all the uses of the original allocation with it.
3519344a3780SDimitry Andric       Module *M = CB->getModule();
3520344a3780SDimitry Andric       Type *Int8Ty = Type::getInt8Ty(M->getContext());
3521344a3780SDimitry Andric       Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
3522344a3780SDimitry Andric       auto *SharedMem = new GlobalVariable(
3523344a3780SDimitry Andric           *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
35247fa27ce4SDimitry Andric           PoisonValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
3525344a3780SDimitry Andric           GlobalValue::NotThreadLocal,
3526344a3780SDimitry Andric           static_cast<unsigned>(AddressSpace::Shared));
3527344a3780SDimitry Andric       auto *NewBuffer =
3528344a3780SDimitry Andric           ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
3529344a3780SDimitry Andric 
3530344a3780SDimitry Andric       auto Remark = [&](OptimizationRemark OR) {
3531344a3780SDimitry Andric         return OR << "Replaced globalized variable with "
3532344a3780SDimitry Andric                   << ore::NV("SharedMemory", AllocSize->getZExtValue())
35337fa27ce4SDimitry Andric                   << (AllocSize->isOne() ? " byte " : " bytes ")
3534344a3780SDimitry Andric                   << "of shared memory.";
3535344a3780SDimitry Andric       };
3536344a3780SDimitry Andric       A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);
3537344a3780SDimitry Andric 
35386f8fc217SDimitry Andric       MaybeAlign Alignment = CB->getRetAlign();
35396f8fc217SDimitry Andric       assert(Alignment &&
35406f8fc217SDimitry Andric              "HeapToShared on allocation without alignment attribute");
35417fa27ce4SDimitry Andric       SharedMem->setAlignment(*Alignment);
3542344a3780SDimitry Andric 
3543145449b1SDimitry Andric       A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer);
3544344a3780SDimitry Andric       A.deleteAfterManifest(*CB);
3545344a3780SDimitry Andric       A.deleteAfterManifest(*FreeCalls.front());
3546344a3780SDimitry Andric 
3547145449b1SDimitry Andric       SharedMemoryUsed += AllocSize->getZExtValue();
3548145449b1SDimitry Andric       NumBytesMovedToSharedMemory = SharedMemoryUsed;
3549344a3780SDimitry Andric       Changed = ChangeStatus::CHANGED;
3550344a3780SDimitry Andric     }
3551344a3780SDimitry Andric 
3552344a3780SDimitry Andric     return Changed;
3553344a3780SDimitry Andric   }
3554344a3780SDimitry Andric 
updateImpl__anon7bbaa8dc0111::AAHeapToSharedFunction3555344a3780SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
3556e3b55780SDimitry Andric     if (MallocCalls.empty())
3557e3b55780SDimitry Andric       return indicatePessimisticFixpoint();
3558344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3559344a3780SDimitry Andric     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
3560e3b55780SDimitry Andric     if (!RFI.Declaration)
3561e3b55780SDimitry Andric       return ChangeStatus::UNCHANGED;
3562e3b55780SDimitry Andric 
3563344a3780SDimitry Andric     Function *F = getAnchorScope();
3564344a3780SDimitry Andric 
3565344a3780SDimitry Andric     auto NumMallocCalls = MallocCalls.size();
3566344a3780SDimitry Andric 
3567344a3780SDimitry Andric     // Only consider malloc calls executed by a single thread with a constant.
3568344a3780SDimitry Andric     for (User *U : RFI.Declaration->users()) {
3569e3b55780SDimitry Andric       if (CallBase *CB = dyn_cast<CallBase>(U)) {
3570e3b55780SDimitry Andric         if (CB->getCaller() != F)
3571e3b55780SDimitry Andric           continue;
3572e3b55780SDimitry Andric         if (!MallocCalls.count(CB))
3573e3b55780SDimitry Andric           continue;
3574e3b55780SDimitry Andric         if (!isa<ConstantInt>(CB->getArgOperand(0))) {
3575e3b55780SDimitry Andric           MallocCalls.remove(CB);
3576e3b55780SDimitry Andric           continue;
3577e3b55780SDimitry Andric         }
35787fa27ce4SDimitry Andric         const auto *ED = A.getAAFor<AAExecutionDomain>(
3579344a3780SDimitry Andric             *this, IRPosition::function(*F), DepClassTy::REQUIRED);
35807fa27ce4SDimitry Andric         if (!ED || !ED->isExecutedByInitialThreadOnly(*CB))
35816f8fc217SDimitry Andric           MallocCalls.remove(CB);
3582344a3780SDimitry Andric       }
3583e3b55780SDimitry Andric     }
3584344a3780SDimitry Andric 
3585344a3780SDimitry Andric     findPotentialRemovedFreeCalls(A);
3586344a3780SDimitry Andric 
3587344a3780SDimitry Andric     if (NumMallocCalls != MallocCalls.size())
3588344a3780SDimitry Andric       return ChangeStatus::CHANGED;
3589344a3780SDimitry Andric 
3590344a3780SDimitry Andric     return ChangeStatus::UNCHANGED;
3591344a3780SDimitry Andric   }
3592344a3780SDimitry Andric 
3593344a3780SDimitry Andric   /// Collection of all malloc calls in a function.
35946f8fc217SDimitry Andric   SmallSetVector<CallBase *, 4> MallocCalls;
3595344a3780SDimitry Andric   /// Collection of potentially removed free calls in a function.
3596344a3780SDimitry Andric   SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
3597145449b1SDimitry Andric   /// The total amount of shared memory that has been used for HeapToShared.
3598145449b1SDimitry Andric   unsigned SharedMemoryUsed = 0;
3599344a3780SDimitry Andric };
3600344a3780SDimitry Andric 
3601344a3780SDimitry Andric struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
3602344a3780SDimitry Andric   using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
AAKernelInfo__anon7bbaa8dc0111::AAKernelInfo3603344a3780SDimitry Andric   AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3604344a3780SDimitry Andric 
3605b1c73532SDimitry Andric   /// The callee value is tracked beyond a simple stripPointerCasts, so we allow
3606b1c73532SDimitry Andric   /// unknown callees.
requiresCalleeForCallBase__anon7bbaa8dc0111::AAKernelInfo3607b1c73532SDimitry Andric   static bool requiresCalleeForCallBase() { return false; }
3608b1c73532SDimitry Andric 
3609344a3780SDimitry Andric   /// Statistics are tracked as part of manifest for now.
trackStatistics__anon7bbaa8dc0111::AAKernelInfo3610344a3780SDimitry Andric   void trackStatistics() const override {}
3611344a3780SDimitry Andric 
3612344a3780SDimitry Andric   /// See AbstractAttribute::getAsStr()
getAsStr__anon7bbaa8dc0111::AAKernelInfo36137fa27ce4SDimitry Andric   const std::string getAsStr(Attributor *) const override {
3614344a3780SDimitry Andric     if (!isValidState())
3615344a3780SDimitry Andric       return "<invalid>";
3616344a3780SDimitry Andric     return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
3617344a3780SDimitry Andric                                                             : "generic") +
3618344a3780SDimitry Andric            std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
3619344a3780SDimitry Andric                                                                : "") +
3620344a3780SDimitry Andric            std::string(" #PRs: ") +
3621c0981da4SDimitry Andric            (ReachedKnownParallelRegions.isValidState()
3622c0981da4SDimitry Andric                 ? std::to_string(ReachedKnownParallelRegions.size())
3623c0981da4SDimitry Andric                 : "<invalid>") +
3624344a3780SDimitry Andric            ", #Unknown PRs: " +
3625c0981da4SDimitry Andric            (ReachedUnknownParallelRegions.isValidState()
3626c0981da4SDimitry Andric                 ? std::to_string(ReachedUnknownParallelRegions.size())
3627c0981da4SDimitry Andric                 : "<invalid>") +
3628c0981da4SDimitry Andric            ", #Reaching Kernels: " +
3629c0981da4SDimitry Andric            (ReachingKernelEntries.isValidState()
3630c0981da4SDimitry Andric                 ? std::to_string(ReachingKernelEntries.size())
3631e3b55780SDimitry Andric                 : "<invalid>") +
3632e3b55780SDimitry Andric            ", #ParLevels: " +
3633e3b55780SDimitry Andric            (ParallelLevels.isValidState()
3634e3b55780SDimitry Andric                 ? std::to_string(ParallelLevels.size())
3635b1c73532SDimitry Andric                 : "<invalid>") +
3636b1c73532SDimitry Andric            ", NestedPar: " + (NestedParallelism ? "yes" : "no");
3637344a3780SDimitry Andric   }
3638344a3780SDimitry Andric 
3639344a3780SDimitry Andric   /// Create an abstract attribute biew for the position \p IRP.
3640344a3780SDimitry Andric   static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);
3641344a3780SDimitry Andric 
3642344a3780SDimitry Andric   /// See AbstractAttribute::getName()
getName__anon7bbaa8dc0111::AAKernelInfo3643344a3780SDimitry Andric   const std::string getName() const override { return "AAKernelInfo"; }
3644344a3780SDimitry Andric 
3645344a3780SDimitry Andric   /// See AbstractAttribute::getIdAddr()
getIdAddr__anon7bbaa8dc0111::AAKernelInfo3646344a3780SDimitry Andric   const char *getIdAddr() const override { return &ID; }
3647344a3780SDimitry Andric 
3648344a3780SDimitry Andric   /// This function should return true if the type of the \p AA is AAKernelInfo
classof__anon7bbaa8dc0111::AAKernelInfo3649344a3780SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
3650344a3780SDimitry Andric     return (AA->getIdAddr() == &ID);
3651344a3780SDimitry Andric   }
3652344a3780SDimitry Andric 
3653344a3780SDimitry Andric   static const char ID;
3654344a3780SDimitry Andric };
3655344a3780SDimitry Andric 
3656344a3780SDimitry Andric /// The function kernel info abstract attribute, basically, what can we say
3657344a3780SDimitry Andric /// about a function with regards to the KernelInfoState.
3658344a3780SDimitry Andric struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction__anon7bbaa8dc0111::AAKernelInfoFunction3659344a3780SDimitry Andric   AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
3660344a3780SDimitry Andric       : AAKernelInfo(IRP, A) {}
3661344a3780SDimitry Andric 
3662c0981da4SDimitry Andric   SmallPtrSet<Instruction *, 4> GuardedInstructions;
3663c0981da4SDimitry Andric 
getGuardedInstructions__anon7bbaa8dc0111::AAKernelInfoFunction3664c0981da4SDimitry Andric   SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
3665c0981da4SDimitry Andric     return GuardedInstructions;
3666c0981da4SDimitry Andric   }
3667c0981da4SDimitry Andric 
setConfigurationOfKernelEnvironment__anon7bbaa8dc0111::AAKernelInfoFunction3668b1c73532SDimitry Andric   void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) {
3669b1c73532SDimitry Andric     Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction(
3670b1c73532SDimitry Andric         KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx});
3671b1c73532SDimitry Andric     assert(NewKernelEnvC && "Failed to create new kernel environment");
3672b1c73532SDimitry Andric     KernelEnvC = cast<ConstantStruct>(NewKernelEnvC);
3673b1c73532SDimitry Andric   }
3674b1c73532SDimitry Andric 
3675b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER)                        \
3676b1c73532SDimitry Andric   void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) {                 \
3677b1c73532SDimitry Andric     ConstantStruct *ConfigC =                                                  \
3678b1c73532SDimitry Andric         KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC);         \
3679b1c73532SDimitry Andric     Constant *NewConfigC = ConstantFoldInsertValueInstruction(                 \
3680b1c73532SDimitry Andric         ConfigC, NewVal, {KernelInfo::MEMBER##Idx});                           \
3681b1c73532SDimitry Andric     assert(NewConfigC && "Failed to create new configuration environment");    \
3682b1c73532SDimitry Andric     setConfigurationOfKernelEnvironment(cast<ConstantStruct>(NewConfigC));     \
3683b1c73532SDimitry Andric   }
3684b1c73532SDimitry Andric 
3685b1c73532SDimitry Andric   KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER__anon7bbaa8dc0111::AAKernelInfoFunction3686b1c73532SDimitry Andric   KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism)
3687b1c73532SDimitry Andric   KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode)
3688b1c73532SDimitry Andric   KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinThreads)
3689b1c73532SDimitry Andric   KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxThreads)
3690b1c73532SDimitry Andric   KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinTeams)
3691b1c73532SDimitry Andric   KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxTeams)
3692b1c73532SDimitry Andric 
3693b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER
3694b1c73532SDimitry Andric 
3695344a3780SDimitry Andric   /// See AbstractAttribute::initialize(...).
3696344a3780SDimitry Andric   void initialize(Attributor &A) override {
3697344a3780SDimitry Andric     // This is a high-level transform that might change the constant arguments
3698344a3780SDimitry Andric     // of the init and dinit calls. We need to tell the Attributor about this
3699344a3780SDimitry Andric     // to avoid other parts using the current constant value for simpliication.
3700344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3701344a3780SDimitry Andric 
3702344a3780SDimitry Andric     Function *Fn = getAnchorScope();
3703344a3780SDimitry Andric 
3704344a3780SDimitry Andric     OMPInformationCache::RuntimeFunctionInfo &InitRFI =
3705344a3780SDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
3706344a3780SDimitry Andric     OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
3707344a3780SDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];
3708344a3780SDimitry Andric 
3709344a3780SDimitry Andric     // For kernels we perform more initialization work, first we find the init
3710344a3780SDimitry Andric     // and deinit calls.
3711344a3780SDimitry Andric     auto StoreCallBase = [](Use &U,
3712344a3780SDimitry Andric                             OMPInformationCache::RuntimeFunctionInfo &RFI,
3713344a3780SDimitry Andric                             CallBase *&Storage) {
3714344a3780SDimitry Andric       CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
3715344a3780SDimitry Andric       assert(CB &&
3716344a3780SDimitry Andric              "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!");
3717344a3780SDimitry Andric       assert(!Storage &&
3718344a3780SDimitry Andric              "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!");
3719344a3780SDimitry Andric       Storage = CB;
3720344a3780SDimitry Andric       return false;
3721344a3780SDimitry Andric     };
3722344a3780SDimitry Andric     InitRFI.foreachUse(
3723344a3780SDimitry Andric         [&](Use &U, Function &) {
3724344a3780SDimitry Andric           StoreCallBase(U, InitRFI, KernelInitCB);
3725344a3780SDimitry Andric           return false;
3726344a3780SDimitry Andric         },
3727344a3780SDimitry Andric         Fn);
3728344a3780SDimitry Andric     DeinitRFI.foreachUse(
3729344a3780SDimitry Andric         [&](Use &U, Function &) {
3730344a3780SDimitry Andric           StoreCallBase(U, DeinitRFI, KernelDeinitCB);
3731344a3780SDimitry Andric           return false;
3732344a3780SDimitry Andric         },
3733344a3780SDimitry Andric         Fn);
3734344a3780SDimitry Andric 
3735c0981da4SDimitry Andric     // Ignore kernels without initializers such as global constructors.
3736145449b1SDimitry Andric     if (!KernelInitCB || !KernelDeinitCB)
3737c0981da4SDimitry Andric       return;
3738145449b1SDimitry Andric 
3739145449b1SDimitry Andric     // Add itself to the reaching kernel and set IsKernelEntry.
3740145449b1SDimitry Andric     ReachingKernelEntries.insert(Fn);
3741145449b1SDimitry Andric     IsKernelEntry = true;
3742344a3780SDimitry Andric 
3743b1c73532SDimitry Andric     KernelEnvC =
3744b1c73532SDimitry Andric         KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
3745b1c73532SDimitry Andric     GlobalVariable *KernelEnvGV =
3746b1c73532SDimitry Andric         KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
3747344a3780SDimitry Andric 
3748b1c73532SDimitry Andric     Attributor::GlobalVariableSimplifictionCallbackTy
3749b1c73532SDimitry Andric         KernelConfigurationSimplifyCB =
3750b1c73532SDimitry Andric             [&](const GlobalVariable &GV, const AbstractAttribute *AA,
3751b1c73532SDimitry Andric                 bool &UsedAssumedInformation) -> std::optional<Constant *> {
3752b1c73532SDimitry Andric       if (!isAtFixpoint()) {
3753b1c73532SDimitry Andric         if (!AA)
3754c0981da4SDimitry Andric           return nullptr;
3755344a3780SDimitry Andric         UsedAssumedInformation = true;
3756b1c73532SDimitry Andric         A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3757344a3780SDimitry Andric       }
3758b1c73532SDimitry Andric       return KernelEnvC;
3759344a3780SDimitry Andric     };
3760344a3780SDimitry Andric 
3761b1c73532SDimitry Andric     A.registerGlobalVariableSimplificationCallback(
3762b1c73532SDimitry Andric         *KernelEnvGV, KernelConfigurationSimplifyCB);
3763344a3780SDimitry Andric 
3764344a3780SDimitry Andric     // Check if we know we are in SPMD-mode already.
3765b1c73532SDimitry Andric     ConstantInt *ExecModeC =
3766b1c73532SDimitry Andric         KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
3767b1c73532SDimitry Andric     ConstantInt *AssumedExecModeC = ConstantInt::get(
376899aabd70SDimitry Andric         ExecModeC->getIntegerType(),
3769b1c73532SDimitry Andric         ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD);
3770b1c73532SDimitry Andric     if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)
3771344a3780SDimitry Andric       SPMDCompatibilityTracker.indicateOptimisticFixpoint();
3772c0981da4SDimitry Andric     else if (DisableOpenMPOptSPMDization)
3773b1c73532SDimitry Andric       // This is a generic region but SPMDization is disabled so stop
3774b1c73532SDimitry Andric       // tracking.
3775c0981da4SDimitry Andric       SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3776b1c73532SDimitry Andric     else
3777b1c73532SDimitry Andric       setExecModeOfKernelEnvironment(AssumedExecModeC);
3778b1c73532SDimitry Andric 
3779b1c73532SDimitry Andric     const Triple T(Fn->getParent()->getTargetTriple());
3780b1c73532SDimitry Andric     auto *Int32Ty = Type::getInt32Ty(Fn->getContext());
3781b1c73532SDimitry Andric     auto [MinThreads, MaxThreads] =
3782b1c73532SDimitry Andric         OpenMPIRBuilder::readThreadBoundsForKernel(T, *Fn);
3783b1c73532SDimitry Andric     if (MinThreads)
3784b1c73532SDimitry Andric       setMinThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinThreads));
3785b1c73532SDimitry Andric     if (MaxThreads)
3786b1c73532SDimitry Andric       setMaxThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxThreads));
3787b1c73532SDimitry Andric     auto [MinTeams, MaxTeams] =
3788b1c73532SDimitry Andric         OpenMPIRBuilder::readTeamBoundsForKernel(T, *Fn);
3789b1c73532SDimitry Andric     if (MinTeams)
3790b1c73532SDimitry Andric       setMinTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinTeams));
3791b1c73532SDimitry Andric     if (MaxTeams)
3792b1c73532SDimitry Andric       setMaxTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxTeams));
3793b1c73532SDimitry Andric 
3794b1c73532SDimitry Andric     ConstantInt *MayUseNestedParallelismC =
3795b1c73532SDimitry Andric         KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
3796b1c73532SDimitry Andric     ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
379799aabd70SDimitry Andric         MayUseNestedParallelismC->getIntegerType(), NestedParallelism);
3798b1c73532SDimitry Andric     setMayUseNestedParallelismOfKernelEnvironment(
3799b1c73532SDimitry Andric         AssumedMayUseNestedParallelismC);
3800b1c73532SDimitry Andric 
3801b1c73532SDimitry Andric     if (!DisableOpenMPOptStateMachineRewrite) {
3802b1c73532SDimitry Andric       ConstantInt *UseGenericStateMachineC =
3803b1c73532SDimitry Andric           KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
3804b1c73532SDimitry Andric               KernelEnvC);
3805b1c73532SDimitry Andric       ConstantInt *AssumedUseGenericStateMachineC =
380699aabd70SDimitry Andric           ConstantInt::get(UseGenericStateMachineC->getIntegerType(), false);
3807b1c73532SDimitry Andric       setUseGenericStateMachineOfKernelEnvironment(
3808b1c73532SDimitry Andric           AssumedUseGenericStateMachineC);
3809b1c73532SDimitry Andric     }
3810e3b55780SDimitry Andric 
3811e3b55780SDimitry Andric     // Register virtual uses of functions we might need to preserve.
3812e3b55780SDimitry Andric     auto RegisterVirtualUse = [&](RuntimeFunction RFKind,
3813e3b55780SDimitry Andric                                   Attributor::VirtualUseCallbackTy &CB) {
3814e3b55780SDimitry Andric       if (!OMPInfoCache.RFIs[RFKind].Declaration)
3815e3b55780SDimitry Andric         return;
3816e3b55780SDimitry Andric       A.registerVirtualUseCallback(*OMPInfoCache.RFIs[RFKind].Declaration, CB);
3817e3b55780SDimitry Andric     };
3818e3b55780SDimitry Andric 
3819e3b55780SDimitry Andric     // Add a dependence to ensure updates if the state changes.
3820e3b55780SDimitry Andric     auto AddDependence = [](Attributor &A, const AAKernelInfo *KI,
3821e3b55780SDimitry Andric                             const AbstractAttribute *QueryingAA) {
3822e3b55780SDimitry Andric       if (QueryingAA) {
3823e3b55780SDimitry Andric         A.recordDependence(*KI, *QueryingAA, DepClassTy::OPTIONAL);
3824e3b55780SDimitry Andric       }
3825e3b55780SDimitry Andric       return true;
3826e3b55780SDimitry Andric     };
3827e3b55780SDimitry Andric 
3828e3b55780SDimitry Andric     Attributor::VirtualUseCallbackTy CustomStateMachineUseCB =
3829e3b55780SDimitry Andric         [&](Attributor &A, const AbstractAttribute *QueryingAA) {
3830e3b55780SDimitry Andric           // Whenever we create a custom state machine we will insert calls to
3831e3b55780SDimitry Andric           // __kmpc_get_hardware_num_threads_in_block,
3832e3b55780SDimitry Andric           // __kmpc_get_warp_size,
3833e3b55780SDimitry Andric           // __kmpc_barrier_simple_generic,
3834e3b55780SDimitry Andric           // __kmpc_kernel_parallel, and
3835e3b55780SDimitry Andric           // __kmpc_kernel_end_parallel.
3836e3b55780SDimitry Andric           // Not needed if we are on track for SPMDzation.
3837e3b55780SDimitry Andric           if (SPMDCompatibilityTracker.isValidState())
3838e3b55780SDimitry Andric             return AddDependence(A, this, QueryingAA);
3839e3b55780SDimitry Andric           // Not needed if we can't rewrite due to an invalid state.
3840e3b55780SDimitry Andric           if (!ReachedKnownParallelRegions.isValidState())
3841e3b55780SDimitry Andric             return AddDependence(A, this, QueryingAA);
3842e3b55780SDimitry Andric           return false;
3843e3b55780SDimitry Andric         };
3844e3b55780SDimitry Andric 
3845e3b55780SDimitry Andric     // Not needed if we are pre-runtime merge.
3846e3b55780SDimitry Andric     if (!KernelInitCB->getCalledFunction()->isDeclaration()) {
3847e3b55780SDimitry Andric       RegisterVirtualUse(OMPRTL___kmpc_get_hardware_num_threads_in_block,
3848e3b55780SDimitry Andric                          CustomStateMachineUseCB);
3849e3b55780SDimitry Andric       RegisterVirtualUse(OMPRTL___kmpc_get_warp_size, CustomStateMachineUseCB);
3850e3b55780SDimitry Andric       RegisterVirtualUse(OMPRTL___kmpc_barrier_simple_generic,
3851e3b55780SDimitry Andric                          CustomStateMachineUseCB);
3852e3b55780SDimitry Andric       RegisterVirtualUse(OMPRTL___kmpc_kernel_parallel,
3853e3b55780SDimitry Andric                          CustomStateMachineUseCB);
3854e3b55780SDimitry Andric       RegisterVirtualUse(OMPRTL___kmpc_kernel_end_parallel,
3855e3b55780SDimitry Andric                          CustomStateMachineUseCB);
3856e3b55780SDimitry Andric     }
3857e3b55780SDimitry Andric 
3858e3b55780SDimitry Andric     // If we do not perform SPMDzation we do not need the virtual uses below.
3859e3b55780SDimitry Andric     if (SPMDCompatibilityTracker.isAtFixpoint())
3860e3b55780SDimitry Andric       return;
3861e3b55780SDimitry Andric 
3862e3b55780SDimitry Andric     Attributor::VirtualUseCallbackTy HWThreadIdUseCB =
3863e3b55780SDimitry Andric         [&](Attributor &A, const AbstractAttribute *QueryingAA) {
3864e3b55780SDimitry Andric           // Whenever we perform SPMDzation we will insert
3865e3b55780SDimitry Andric           // __kmpc_get_hardware_thread_id_in_block calls.
3866e3b55780SDimitry Andric           if (!SPMDCompatibilityTracker.isValidState())
3867e3b55780SDimitry Andric             return AddDependence(A, this, QueryingAA);
3868e3b55780SDimitry Andric           return false;
3869e3b55780SDimitry Andric         };
3870e3b55780SDimitry Andric     RegisterVirtualUse(OMPRTL___kmpc_get_hardware_thread_id_in_block,
3871e3b55780SDimitry Andric                        HWThreadIdUseCB);
3872e3b55780SDimitry Andric 
3873e3b55780SDimitry Andric     Attributor::VirtualUseCallbackTy SPMDBarrierUseCB =
3874e3b55780SDimitry Andric         [&](Attributor &A, const AbstractAttribute *QueryingAA) {
3875e3b55780SDimitry Andric           // Whenever we perform SPMDzation with guarding we will insert
3876e3b55780SDimitry Andric           // __kmpc_simple_barrier_spmd calls. If SPMDzation failed, there is
3877e3b55780SDimitry Andric           // nothing to guard, or there are no parallel regions, we don't need
3878e3b55780SDimitry Andric           // the calls.
3879e3b55780SDimitry Andric           if (!SPMDCompatibilityTracker.isValidState())
3880e3b55780SDimitry Andric             return AddDependence(A, this, QueryingAA);
3881e3b55780SDimitry Andric           if (SPMDCompatibilityTracker.empty())
3882e3b55780SDimitry Andric             return AddDependence(A, this, QueryingAA);
3883e3b55780SDimitry Andric           if (!mayContainParallelRegion())
3884e3b55780SDimitry Andric             return AddDependence(A, this, QueryingAA);
3885e3b55780SDimitry Andric           return false;
3886e3b55780SDimitry Andric         };
3887e3b55780SDimitry Andric     RegisterVirtualUse(OMPRTL___kmpc_barrier_simple_spmd, SPMDBarrierUseCB);
3888c0981da4SDimitry Andric   }
3889c0981da4SDimitry Andric 
3890c0981da4SDimitry Andric   /// Sanitize the string \p S such that it is a suitable global symbol name.
sanitizeForGlobalName__anon7bbaa8dc0111::AAKernelInfoFunction3891c0981da4SDimitry Andric   static std::string sanitizeForGlobalName(std::string S) {
3892c0981da4SDimitry Andric     std::replace_if(
3893c0981da4SDimitry Andric         S.begin(), S.end(),
3894c0981da4SDimitry Andric         [](const char C) {
3895c0981da4SDimitry Andric           return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
3896c0981da4SDimitry Andric                    (C >= '0' && C <= '9') || C == '_');
3897c0981da4SDimitry Andric         },
3898c0981da4SDimitry Andric         '.');
3899c0981da4SDimitry Andric     return S;
3900344a3780SDimitry Andric   }
3901344a3780SDimitry Andric 
3902344a3780SDimitry Andric   /// Modify the IR based on the KernelInfoState as the fixpoint iteration is
3903344a3780SDimitry Andric   /// finished now.
manifest__anon7bbaa8dc0111::AAKernelInfoFunction3904344a3780SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
3905344a3780SDimitry Andric     // If we are not looking at a kernel with __kmpc_target_init and
3906344a3780SDimitry Andric     // __kmpc_target_deinit call we cannot actually manifest the information.
3907344a3780SDimitry Andric     if (!KernelInitCB || !KernelDeinitCB)
3908344a3780SDimitry Andric       return ChangeStatus::UNCHANGED;
3909344a3780SDimitry Andric 
3910c0981da4SDimitry Andric     ChangeStatus Changed = ChangeStatus::UNCHANGED;
3911b1c73532SDimitry Andric 
3912b1c73532SDimitry Andric     bool HasBuiltStateMachine = true;
3913e3b55780SDimitry Andric     if (!changeToSPMDMode(A, Changed)) {
3914e3b55780SDimitry Andric       if (!KernelInitCB->getCalledFunction()->isDeclaration())
3915b1c73532SDimitry Andric         HasBuiltStateMachine = buildCustomStateMachine(A, Changed);
3916b1c73532SDimitry Andric       else
3917b1c73532SDimitry Andric         HasBuiltStateMachine = false;
3918b1c73532SDimitry Andric     }
3919b1c73532SDimitry Andric 
3920b1c73532SDimitry Andric     // We need to reset KernelEnvC if specific rewriting is not done.
3921b1c73532SDimitry Andric     ConstantStruct *ExistingKernelEnvC =
3922b1c73532SDimitry Andric         KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
3923b1c73532SDimitry Andric     ConstantInt *OldUseGenericStateMachineVal =
3924b1c73532SDimitry Andric         KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
3925b1c73532SDimitry Andric             ExistingKernelEnvC);
3926b1c73532SDimitry Andric     if (!HasBuiltStateMachine)
3927b1c73532SDimitry Andric       setUseGenericStateMachineOfKernelEnvironment(
3928b1c73532SDimitry Andric           OldUseGenericStateMachineVal);
3929b1c73532SDimitry Andric 
3930b1c73532SDimitry Andric     // At last, update the KernelEnvc
3931b1c73532SDimitry Andric     GlobalVariable *KernelEnvGV =
3932b1c73532SDimitry Andric         KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
3933b1c73532SDimitry Andric     if (KernelEnvGV->getInitializer() != KernelEnvC) {
3934b1c73532SDimitry Andric       KernelEnvGV->setInitializer(KernelEnvC);
3935b1c73532SDimitry Andric       Changed = ChangeStatus::CHANGED;
3936e3b55780SDimitry Andric     }
3937344a3780SDimitry Andric 
3938c0981da4SDimitry Andric     return Changed;
3939344a3780SDimitry Andric   }
3940344a3780SDimitry Andric 
insertInstructionGuardsHelper__anon7bbaa8dc0111::AAKernelInfoFunction3941e3b55780SDimitry Andric   void insertInstructionGuardsHelper(Attributor &A) {
3942344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3943344a3780SDimitry Andric 
3944c0981da4SDimitry Andric     auto CreateGuardedRegion = [&](Instruction *RegionStartI,
3945c0981da4SDimitry Andric                                    Instruction *RegionEndI) {
3946c0981da4SDimitry Andric       LoopInfo *LI = nullptr;
3947c0981da4SDimitry Andric       DominatorTree *DT = nullptr;
3948c0981da4SDimitry Andric       MemorySSAUpdater *MSU = nullptr;
3949c0981da4SDimitry Andric       using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3950c0981da4SDimitry Andric 
3951c0981da4SDimitry Andric       BasicBlock *ParentBB = RegionStartI->getParent();
3952c0981da4SDimitry Andric       Function *Fn = ParentBB->getParent();
3953c0981da4SDimitry Andric       Module &M = *Fn->getParent();
3954c0981da4SDimitry Andric 
3955c0981da4SDimitry Andric       // Create all the blocks and logic.
3956c0981da4SDimitry Andric       // ParentBB:
3957c0981da4SDimitry Andric       //    goto RegionCheckTidBB
3958c0981da4SDimitry Andric       // RegionCheckTidBB:
3959c0981da4SDimitry Andric       //    Tid = __kmpc_hardware_thread_id()
3960c0981da4SDimitry Andric       //    if (Tid != 0)
3961c0981da4SDimitry Andric       //        goto RegionBarrierBB
3962c0981da4SDimitry Andric       // RegionStartBB:
3963c0981da4SDimitry Andric       //    <execute instructions guarded>
3964c0981da4SDimitry Andric       //    goto RegionEndBB
3965c0981da4SDimitry Andric       // RegionEndBB:
3966c0981da4SDimitry Andric       //    <store escaping values to shared mem>
3967c0981da4SDimitry Andric       //    goto RegionBarrierBB
3968c0981da4SDimitry Andric       //  RegionBarrierBB:
3969c0981da4SDimitry Andric       //    __kmpc_simple_barrier_spmd()
3970c0981da4SDimitry Andric       //    // second barrier is omitted if lacking escaping values.
3971c0981da4SDimitry Andric       //    <load escaping values from shared mem>
3972c0981da4SDimitry Andric       //    __kmpc_simple_barrier_spmd()
3973c0981da4SDimitry Andric       //    goto RegionExitBB
3974c0981da4SDimitry Andric       // RegionExitBB:
3975c0981da4SDimitry Andric       //    <execute rest of instructions>
3976c0981da4SDimitry Andric 
3977c0981da4SDimitry Andric       BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
3978c0981da4SDimitry Andric                                            DT, LI, MSU, "region.guarded.end");
3979c0981da4SDimitry Andric       BasicBlock *RegionBarrierBB =
3980c0981da4SDimitry Andric           SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
3981c0981da4SDimitry Andric                      MSU, "region.barrier");
3982c0981da4SDimitry Andric       BasicBlock *RegionExitBB =
3983c0981da4SDimitry Andric           SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
3984c0981da4SDimitry Andric                      DT, LI, MSU, "region.exit");
3985c0981da4SDimitry Andric       BasicBlock *RegionStartBB =
3986c0981da4SDimitry Andric           SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
3987c0981da4SDimitry Andric 
3988c0981da4SDimitry Andric       assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&
3989c0981da4SDimitry Andric              "Expected a different CFG");
3990c0981da4SDimitry Andric 
3991c0981da4SDimitry Andric       BasicBlock *RegionCheckTidBB = SplitBlock(
3992c0981da4SDimitry Andric           ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
3993c0981da4SDimitry Andric 
3994c0981da4SDimitry Andric       // Register basic blocks with the Attributor.
3995c0981da4SDimitry Andric       A.registerManifestAddedBasicBlock(*RegionEndBB);
3996c0981da4SDimitry Andric       A.registerManifestAddedBasicBlock(*RegionBarrierBB);
3997c0981da4SDimitry Andric       A.registerManifestAddedBasicBlock(*RegionExitBB);
3998c0981da4SDimitry Andric       A.registerManifestAddedBasicBlock(*RegionStartBB);
3999c0981da4SDimitry Andric       A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
4000c0981da4SDimitry Andric 
4001c0981da4SDimitry Andric       bool HasBroadcastValues = false;
4002c0981da4SDimitry Andric       // Find escaping outputs from the guarded region to outside users and
4003c0981da4SDimitry Andric       // broadcast their values to them.
4004c0981da4SDimitry Andric       for (Instruction &I : *RegionStartBB) {
4005b1c73532SDimitry Andric         SmallVector<Use *, 4> OutsideUses;
4006b1c73532SDimitry Andric         for (Use &U : I.uses()) {
4007b1c73532SDimitry Andric           Instruction &UsrI = *cast<Instruction>(U.getUser());
4008c0981da4SDimitry Andric           if (UsrI.getParent() != RegionStartBB)
4009b1c73532SDimitry Andric             OutsideUses.push_back(&U);
4010c0981da4SDimitry Andric         }
4011c0981da4SDimitry Andric 
4012b1c73532SDimitry Andric         if (OutsideUses.empty())
4013c0981da4SDimitry Andric           continue;
4014c0981da4SDimitry Andric 
4015c0981da4SDimitry Andric         HasBroadcastValues = true;
4016c0981da4SDimitry Andric 
4017c0981da4SDimitry Andric         // Emit a global variable in shared memory to store the broadcasted
4018c0981da4SDimitry Andric         // value.
4019c0981da4SDimitry Andric         auto *SharedMem = new GlobalVariable(
4020c0981da4SDimitry Andric             M, I.getType(), /* IsConstant */ false,
4021c0981da4SDimitry Andric             GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
4022c0981da4SDimitry Andric             sanitizeForGlobalName(
4023c0981da4SDimitry Andric                 (I.getName() + ".guarded.output.alloc").str()),
4024c0981da4SDimitry Andric             nullptr, GlobalValue::NotThreadLocal,
4025c0981da4SDimitry Andric             static_cast<unsigned>(AddressSpace::Shared));
4026c0981da4SDimitry Andric 
4027c0981da4SDimitry Andric         // Emit a store instruction to update the value.
4028ac9a064cSDimitry Andric         new StoreInst(&I, SharedMem,
4029ac9a064cSDimitry Andric                       RegionEndBB->getTerminator()->getIterator());
4030c0981da4SDimitry Andric 
4031ac9a064cSDimitry Andric         LoadInst *LoadI = new LoadInst(
4032ac9a064cSDimitry Andric             I.getType(), SharedMem, I.getName() + ".guarded.output.load",
4033ac9a064cSDimitry Andric             RegionBarrierBB->getTerminator()->getIterator());
4034c0981da4SDimitry Andric 
4035c0981da4SDimitry Andric         // Emit a load instruction and replace uses of the output value.
4036b1c73532SDimitry Andric         for (Use *U : OutsideUses)
4037b1c73532SDimitry Andric           A.changeUseAfterManifest(*U, *LoadI);
4038c0981da4SDimitry Andric       }
4039c0981da4SDimitry Andric 
4040c0981da4SDimitry Andric       auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4041c0981da4SDimitry Andric 
4042c0981da4SDimitry Andric       // Go to tid check BB in ParentBB.
4043c0981da4SDimitry Andric       const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
4044c0981da4SDimitry Andric       ParentBB->getTerminator()->eraseFromParent();
4045c0981da4SDimitry Andric       OpenMPIRBuilder::LocationDescription Loc(
4046c0981da4SDimitry Andric           InsertPointTy(ParentBB, ParentBB->end()), DL);
4047c0981da4SDimitry Andric       OMPInfoCache.OMPBuilder.updateToLocation(Loc);
40486f8fc217SDimitry Andric       uint32_t SrcLocStrSize;
40496f8fc217SDimitry Andric       auto *SrcLocStr =
40506f8fc217SDimitry Andric           OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize);
40516f8fc217SDimitry Andric       Value *Ident =
40526f8fc217SDimitry Andric           OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4053c0981da4SDimitry Andric       BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
4054c0981da4SDimitry Andric 
4055c0981da4SDimitry Andric       // Add check for Tid in RegionCheckTidBB
4056c0981da4SDimitry Andric       RegionCheckTidBB->getTerminator()->eraseFromParent();
4057c0981da4SDimitry Andric       OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
4058c0981da4SDimitry Andric           InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
4059c0981da4SDimitry Andric       OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
4060c0981da4SDimitry Andric       FunctionCallee HardwareTidFn =
4061c0981da4SDimitry Andric           OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4062c0981da4SDimitry Andric               M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
40636f8fc217SDimitry Andric       CallInst *Tid =
4064c0981da4SDimitry Andric           OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
40656f8fc217SDimitry Andric       Tid->setDebugLoc(DL);
40666f8fc217SDimitry Andric       OMPInfoCache.setCallingConvention(HardwareTidFn, Tid);
4067c0981da4SDimitry Andric       Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
4068c0981da4SDimitry Andric       OMPInfoCache.OMPBuilder.Builder
4069c0981da4SDimitry Andric           .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
4070c0981da4SDimitry Andric           ->setDebugLoc(DL);
4071c0981da4SDimitry Andric 
4072c0981da4SDimitry Andric       // First barrier for synchronization, ensures main thread has updated
4073c0981da4SDimitry Andric       // values.
4074c0981da4SDimitry Andric       FunctionCallee BarrierFn =
4075c0981da4SDimitry Andric           OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4076c0981da4SDimitry Andric               M, OMPRTL___kmpc_barrier_simple_spmd);
4077c0981da4SDimitry Andric       OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
4078c0981da4SDimitry Andric           RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
40796f8fc217SDimitry Andric       CallInst *Barrier =
40806f8fc217SDimitry Andric           OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid});
40816f8fc217SDimitry Andric       Barrier->setDebugLoc(DL);
40826f8fc217SDimitry Andric       OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
4083c0981da4SDimitry Andric 
4084c0981da4SDimitry Andric       // Second barrier ensures workers have read broadcast values.
40856f8fc217SDimitry Andric       if (HasBroadcastValues) {
4086ac9a064cSDimitry Andric         CallInst *Barrier =
4087ac9a064cSDimitry Andric             CallInst::Create(BarrierFn, {Ident, Tid}, "",
4088ac9a064cSDimitry Andric                              RegionBarrierBB->getTerminator()->getIterator());
40896f8fc217SDimitry Andric         Barrier->setDebugLoc(DL);
40906f8fc217SDimitry Andric         OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
40916f8fc217SDimitry Andric       }
4092c0981da4SDimitry Andric     };
4093c0981da4SDimitry Andric 
4094c0981da4SDimitry Andric     auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
4095c0981da4SDimitry Andric     SmallPtrSet<BasicBlock *, 8> Visited;
4096c0981da4SDimitry Andric     for (Instruction *GuardedI : SPMDCompatibilityTracker) {
4097c0981da4SDimitry Andric       BasicBlock *BB = GuardedI->getParent();
4098c0981da4SDimitry Andric       if (!Visited.insert(BB).second)
4099c0981da4SDimitry Andric         continue;
4100c0981da4SDimitry Andric 
4101c0981da4SDimitry Andric       SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
4102c0981da4SDimitry Andric       Instruction *LastEffect = nullptr;
4103c0981da4SDimitry Andric       BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
4104c0981da4SDimitry Andric       while (++IP != IPEnd) {
4105c0981da4SDimitry Andric         if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
4106c0981da4SDimitry Andric           continue;
4107c0981da4SDimitry Andric         Instruction *I = &*IP;
4108c0981da4SDimitry Andric         if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
4109c0981da4SDimitry Andric           continue;
4110c0981da4SDimitry Andric         if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
4111c0981da4SDimitry Andric           LastEffect = nullptr;
4112c0981da4SDimitry Andric           continue;
4113c0981da4SDimitry Andric         }
4114c0981da4SDimitry Andric         if (LastEffect)
4115c0981da4SDimitry Andric           Reorders.push_back({I, LastEffect});
4116c0981da4SDimitry Andric         LastEffect = &*IP;
4117c0981da4SDimitry Andric       }
4118c0981da4SDimitry Andric       for (auto &Reorder : Reorders)
4119c0981da4SDimitry Andric         Reorder.first->moveBefore(Reorder.second);
4120c0981da4SDimitry Andric     }
4121c0981da4SDimitry Andric 
4122c0981da4SDimitry Andric     SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
4123c0981da4SDimitry Andric 
4124c0981da4SDimitry Andric     for (Instruction *GuardedI : SPMDCompatibilityTracker) {
4125c0981da4SDimitry Andric       BasicBlock *BB = GuardedI->getParent();
4126c0981da4SDimitry Andric       auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
4127c0981da4SDimitry Andric           IRPosition::function(*GuardedI->getFunction()), nullptr,
4128c0981da4SDimitry Andric           DepClassTy::NONE);
4129c0981da4SDimitry Andric       assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo");
4130c0981da4SDimitry Andric       auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
4131c0981da4SDimitry Andric       // Continue if instruction is already guarded.
4132c0981da4SDimitry Andric       if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
4133c0981da4SDimitry Andric         continue;
4134c0981da4SDimitry Andric 
4135c0981da4SDimitry Andric       Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
4136c0981da4SDimitry Andric       for (Instruction &I : *BB) {
4137c0981da4SDimitry Andric         // If instruction I needs to be guarded update the guarded region
4138c0981da4SDimitry Andric         // bounds.
4139c0981da4SDimitry Andric         if (SPMDCompatibilityTracker.contains(&I)) {
4140c0981da4SDimitry Andric           CalleeAAFunction.getGuardedInstructions().insert(&I);
4141c0981da4SDimitry Andric           if (GuardedRegionStart)
4142c0981da4SDimitry Andric             GuardedRegionEnd = &I;
4143c0981da4SDimitry Andric           else
4144c0981da4SDimitry Andric             GuardedRegionStart = GuardedRegionEnd = &I;
4145c0981da4SDimitry Andric 
4146c0981da4SDimitry Andric           continue;
4147c0981da4SDimitry Andric         }
4148c0981da4SDimitry Andric 
4149c0981da4SDimitry Andric         // Instruction I does not need guarding, store
4150c0981da4SDimitry Andric         // any region found and reset bounds.
4151c0981da4SDimitry Andric         if (GuardedRegionStart) {
4152c0981da4SDimitry Andric           GuardedRegions.push_back(
4153c0981da4SDimitry Andric               std::make_pair(GuardedRegionStart, GuardedRegionEnd));
4154c0981da4SDimitry Andric           GuardedRegionStart = nullptr;
4155c0981da4SDimitry Andric           GuardedRegionEnd = nullptr;
4156c0981da4SDimitry Andric         }
4157c0981da4SDimitry Andric       }
4158c0981da4SDimitry Andric     }
4159c0981da4SDimitry Andric 
4160c0981da4SDimitry Andric     for (auto &GR : GuardedRegions)
4161c0981da4SDimitry Andric       CreateGuardedRegion(GR.first, GR.second);
4162e3b55780SDimitry Andric   }
4163e3b55780SDimitry Andric 
forceSingleThreadPerWorkgroupHelper__anon7bbaa8dc0111::AAKernelInfoFunction4164e3b55780SDimitry Andric   void forceSingleThreadPerWorkgroupHelper(Attributor &A) {
4165e3b55780SDimitry Andric     // Only allow 1 thread per workgroup to continue executing the user code.
4166e3b55780SDimitry Andric     //
4167e3b55780SDimitry Andric     //     InitCB = __kmpc_target_init(...)
4168e3b55780SDimitry Andric     //     ThreadIdInBlock = __kmpc_get_hardware_thread_id_in_block();
4169e3b55780SDimitry Andric     //     if (ThreadIdInBlock != 0) return;
4170e3b55780SDimitry Andric     // UserCode:
4171e3b55780SDimitry Andric     //     // user code
4172e3b55780SDimitry Andric     //
4173e3b55780SDimitry Andric     auto &Ctx = getAnchorValue().getContext();
4174e3b55780SDimitry Andric     Function *Kernel = getAssociatedFunction();
4175e3b55780SDimitry Andric     assert(Kernel && "Expected an associated function!");
4176e3b55780SDimitry Andric 
4177e3b55780SDimitry Andric     // Create block for user code to branch to from initial block.
4178e3b55780SDimitry Andric     BasicBlock *InitBB = KernelInitCB->getParent();
4179e3b55780SDimitry Andric     BasicBlock *UserCodeBB = InitBB->splitBasicBlock(
4180e3b55780SDimitry Andric         KernelInitCB->getNextNode(), "main.thread.user_code");
4181e3b55780SDimitry Andric     BasicBlock *ReturnBB =
4182e3b55780SDimitry Andric         BasicBlock::Create(Ctx, "exit.threads", Kernel, UserCodeBB);
4183e3b55780SDimitry Andric 
4184e3b55780SDimitry Andric     // Register blocks with attributor:
4185e3b55780SDimitry Andric     A.registerManifestAddedBasicBlock(*InitBB);
4186e3b55780SDimitry Andric     A.registerManifestAddedBasicBlock(*UserCodeBB);
4187e3b55780SDimitry Andric     A.registerManifestAddedBasicBlock(*ReturnBB);
4188e3b55780SDimitry Andric 
4189e3b55780SDimitry Andric     // Debug location:
4190e3b55780SDimitry Andric     const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
4191e3b55780SDimitry Andric     ReturnInst::Create(Ctx, ReturnBB)->setDebugLoc(DLoc);
4192e3b55780SDimitry Andric     InitBB->getTerminator()->eraseFromParent();
4193e3b55780SDimitry Andric 
4194e3b55780SDimitry Andric     // Prepare call to OMPRTL___kmpc_get_hardware_thread_id_in_block.
4195e3b55780SDimitry Andric     Module &M = *Kernel->getParent();
4196e3b55780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4197e3b55780SDimitry Andric     FunctionCallee ThreadIdInBlockFn =
4198e3b55780SDimitry Andric         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4199e3b55780SDimitry Andric             M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
4200e3b55780SDimitry Andric 
4201e3b55780SDimitry Andric     // Get thread ID in block.
4202e3b55780SDimitry Andric     CallInst *ThreadIdInBlock =
4203e3b55780SDimitry Andric         CallInst::Create(ThreadIdInBlockFn, "thread_id.in.block", InitBB);
4204e3b55780SDimitry Andric     OMPInfoCache.setCallingConvention(ThreadIdInBlockFn, ThreadIdInBlock);
4205e3b55780SDimitry Andric     ThreadIdInBlock->setDebugLoc(DLoc);
4206e3b55780SDimitry Andric 
4207e3b55780SDimitry Andric     // Eliminate all threads in the block with ID not equal to 0:
4208e3b55780SDimitry Andric     Instruction *IsMainThread =
4209e3b55780SDimitry Andric         ICmpInst::Create(ICmpInst::ICmp, CmpInst::ICMP_NE, ThreadIdInBlock,
4210e3b55780SDimitry Andric                          ConstantInt::get(ThreadIdInBlock->getType(), 0),
4211e3b55780SDimitry Andric                          "thread.is_main", InitBB);
4212e3b55780SDimitry Andric     IsMainThread->setDebugLoc(DLoc);
4213e3b55780SDimitry Andric     BranchInst::Create(ReturnBB, UserCodeBB, IsMainThread, InitBB);
4214e3b55780SDimitry Andric   }
4215e3b55780SDimitry Andric 
changeToSPMDMode__anon7bbaa8dc0111::AAKernelInfoFunction4216e3b55780SDimitry Andric   bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
4217e3b55780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4218e3b55780SDimitry Andric 
42197fa27ce4SDimitry Andric     // We cannot change to SPMD mode if the runtime functions aren't availible.
42207fa27ce4SDimitry Andric     if (!OMPInfoCache.runtimeFnsAvailable(
42217fa27ce4SDimitry Andric             {OMPRTL___kmpc_get_hardware_thread_id_in_block,
42227fa27ce4SDimitry Andric              OMPRTL___kmpc_barrier_simple_spmd}))
42237fa27ce4SDimitry Andric       return false;
42247fa27ce4SDimitry Andric 
4225e3b55780SDimitry Andric     if (!SPMDCompatibilityTracker.isAssumed()) {
4226e3b55780SDimitry Andric       for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
4227e3b55780SDimitry Andric         if (!NonCompatibleI)
4228e3b55780SDimitry Andric           continue;
4229e3b55780SDimitry Andric 
4230e3b55780SDimitry Andric         // Skip diagnostics on calls to known OpenMP runtime functions for now.
4231e3b55780SDimitry Andric         if (auto *CB = dyn_cast<CallBase>(NonCompatibleI))
4232e3b55780SDimitry Andric           if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction()))
4233e3b55780SDimitry Andric             continue;
4234e3b55780SDimitry Andric 
4235e3b55780SDimitry Andric         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
4236e3b55780SDimitry Andric           ORA << "Value has potential side effects preventing SPMD-mode "
4237e3b55780SDimitry Andric                  "execution";
4238e3b55780SDimitry Andric           if (isa<CallBase>(NonCompatibleI)) {
4239ac9a064cSDimitry Andric             ORA << ". Add `[[omp::assume(\"ompx_spmd_amenable\")]]` to "
4240e3b55780SDimitry Andric                    "the called function to override";
4241e3b55780SDimitry Andric           }
4242e3b55780SDimitry Andric           return ORA << ".";
4243e3b55780SDimitry Andric         };
4244e3b55780SDimitry Andric         A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121",
4245e3b55780SDimitry Andric                                                  Remark);
4246e3b55780SDimitry Andric 
4247e3b55780SDimitry Andric         LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "
4248e3b55780SDimitry Andric                           << *NonCompatibleI << "\n");
4249e3b55780SDimitry Andric       }
4250e3b55780SDimitry Andric 
4251e3b55780SDimitry Andric       return false;
4252e3b55780SDimitry Andric     }
4253e3b55780SDimitry Andric 
4254e3b55780SDimitry Andric     // Get the actual kernel, could be the caller of the anchor scope if we have
4255e3b55780SDimitry Andric     // a debug wrapper.
4256e3b55780SDimitry Andric     Function *Kernel = getAnchorScope();
4257e3b55780SDimitry Andric     if (Kernel->hasLocalLinkage()) {
4258e3b55780SDimitry Andric       assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper.");
4259e3b55780SDimitry Andric       auto *CB = cast<CallBase>(Kernel->user_back());
4260e3b55780SDimitry Andric       Kernel = CB->getCaller();
4261e3b55780SDimitry Andric     }
4262b1c73532SDimitry Andric     assert(omp::isOpenMPKernel(*Kernel) && "Expected kernel function!");
4263e3b55780SDimitry Andric 
4264e3b55780SDimitry Andric     // Check if the kernel is already in SPMD mode, if so, return success.
4265b1c73532SDimitry Andric     ConstantStruct *ExistingKernelEnvC =
4266b1c73532SDimitry Andric         KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
4267b1c73532SDimitry Andric     auto *ExecModeC =
4268b1c73532SDimitry Andric         KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
4269b1c73532SDimitry Andric     const int8_t ExecModeVal = ExecModeC->getSExtValue();
4270e3b55780SDimitry Andric     if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
4271e3b55780SDimitry Andric       return true;
4272e3b55780SDimitry Andric 
4273e3b55780SDimitry Andric     // We will now unconditionally modify the IR, indicate a change.
4274e3b55780SDimitry Andric     Changed = ChangeStatus::CHANGED;
4275e3b55780SDimitry Andric 
4276e3b55780SDimitry Andric     // Do not use instruction guards when no parallel is present inside
4277e3b55780SDimitry Andric     // the target region.
4278e3b55780SDimitry Andric     if (mayContainParallelRegion())
4279e3b55780SDimitry Andric       insertInstructionGuardsHelper(A);
4280e3b55780SDimitry Andric     else
4281e3b55780SDimitry Andric       forceSingleThreadPerWorkgroupHelper(A);
4282c0981da4SDimitry Andric 
4283c0981da4SDimitry Andric     // Adjust the global exec mode flag that tells the runtime what mode this
4284c0981da4SDimitry Andric     // kernel is executed in.
4285c0981da4SDimitry Andric     assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
4286c0981da4SDimitry Andric            "Initially non-SPMD kernel has SPMD exec mode!");
428799aabd70SDimitry Andric     setExecModeOfKernelEnvironment(
428899aabd70SDimitry Andric         ConstantInt::get(ExecModeC->getIntegerType(),
428999aabd70SDimitry Andric                          ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
4290344a3780SDimitry Andric 
4291344a3780SDimitry Andric     ++NumOpenMPTargetRegionKernelsSPMD;
4292344a3780SDimitry Andric 
4293344a3780SDimitry Andric     auto Remark = [&](OptimizationRemark OR) {
4294344a3780SDimitry Andric       return OR << "Transformed generic-mode kernel to SPMD-mode.";
4295344a3780SDimitry Andric     };
4296344a3780SDimitry Andric     A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark);
4297344a3780SDimitry Andric     return true;
4298344a3780SDimitry Andric   };
4299344a3780SDimitry Andric 
buildCustomStateMachine__anon7bbaa8dc0111::AAKernelInfoFunction4300b1c73532SDimitry Andric   bool buildCustomStateMachine(Attributor &A, ChangeStatus &Changed) {
4301c0981da4SDimitry Andric     // If we have disabled state machine rewrites, don't make a custom one
4302c0981da4SDimitry Andric     if (DisableOpenMPOptStateMachineRewrite)
4303b1c73532SDimitry Andric       return false;
4304344a3780SDimitry Andric 
4305c0981da4SDimitry Andric     // Don't rewrite the state machine if we are not in a valid state.
4306c0981da4SDimitry Andric     if (!ReachedKnownParallelRegions.isValidState())
4307b1c73532SDimitry Andric       return false;
4308c0981da4SDimitry Andric 
43097fa27ce4SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
43107fa27ce4SDimitry Andric     if (!OMPInfoCache.runtimeFnsAvailable(
43117fa27ce4SDimitry Andric             {OMPRTL___kmpc_get_hardware_num_threads_in_block,
43127fa27ce4SDimitry Andric              OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
43137fa27ce4SDimitry Andric              OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
4314b1c73532SDimitry Andric       return false;
43157fa27ce4SDimitry Andric 
4316b1c73532SDimitry Andric     ConstantStruct *ExistingKernelEnvC =
4317b1c73532SDimitry Andric         KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
4318344a3780SDimitry Andric 
4319344a3780SDimitry Andric     // Check if the current configuration is non-SPMD and generic state machine.
4320344a3780SDimitry Andric     // If we already have SPMD mode or a custom state machine we do not need to
4321344a3780SDimitry Andric     // go any further. If it is anything but a constant something is weird and
4322344a3780SDimitry Andric     // we give up.
4323b1c73532SDimitry Andric     ConstantInt *UseStateMachineC =
4324b1c73532SDimitry Andric         KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
4325b1c73532SDimitry Andric             ExistingKernelEnvC);
4326b1c73532SDimitry Andric     ConstantInt *ModeC =
4327b1c73532SDimitry Andric         KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
4328344a3780SDimitry Andric 
4329344a3780SDimitry Andric     // If we are stuck with generic mode, try to create a custom device (=GPU)
4330344a3780SDimitry Andric     // state machine which is specialized for the parallel regions that are
4331344a3780SDimitry Andric     // reachable by the kernel.
4332b1c73532SDimitry Andric     if (UseStateMachineC->isZero() ||
4333b1c73532SDimitry Andric         (ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
4334b1c73532SDimitry Andric       return false;
4335b1c73532SDimitry Andric 
4336b1c73532SDimitry Andric     Changed = ChangeStatus::CHANGED;
4337344a3780SDimitry Andric 
4338344a3780SDimitry Andric     // If not SPMD mode, indicate we use a custom state machine now.
4339b1c73532SDimitry Andric     setUseGenericStateMachineOfKernelEnvironment(
434099aabd70SDimitry Andric         ConstantInt::get(UseStateMachineC->getIntegerType(), false));
4341344a3780SDimitry Andric 
4342344a3780SDimitry Andric     // If we don't actually need a state machine we are done here. This can
4343344a3780SDimitry Andric     // happen if there simply are no parallel regions. In the resulting kernel
4344344a3780SDimitry Andric     // all worker threads will simply exit right away, leaving the main thread
4345344a3780SDimitry Andric     // to do the work alone.
4346c0981da4SDimitry Andric     if (!mayContainParallelRegion()) {
4347344a3780SDimitry Andric       ++NumOpenMPTargetRegionKernelsWithoutStateMachine;
4348344a3780SDimitry Andric 
4349344a3780SDimitry Andric       auto Remark = [&](OptimizationRemark OR) {
4350344a3780SDimitry Andric         return OR << "Removing unused state machine from generic-mode kernel.";
4351344a3780SDimitry Andric       };
4352344a3780SDimitry Andric       A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);
4353344a3780SDimitry Andric 
4354b1c73532SDimitry Andric       return true;
4355344a3780SDimitry Andric     }
4356344a3780SDimitry Andric 
4357344a3780SDimitry Andric     // Keep track in the statistics of our new shiny custom state machine.
4358344a3780SDimitry Andric     if (ReachedUnknownParallelRegions.empty()) {
4359344a3780SDimitry Andric       ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;
4360344a3780SDimitry Andric 
4361344a3780SDimitry Andric       auto Remark = [&](OptimizationRemark OR) {
4362344a3780SDimitry Andric         return OR << "Rewriting generic-mode kernel with a customized state "
4363344a3780SDimitry Andric                      "machine.";
4364344a3780SDimitry Andric       };
4365344a3780SDimitry Andric       A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark);
4366344a3780SDimitry Andric     } else {
4367344a3780SDimitry Andric       ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;
4368344a3780SDimitry Andric 
4369344a3780SDimitry Andric       auto Remark = [&](OptimizationRemarkAnalysis OR) {
4370344a3780SDimitry Andric         return OR << "Generic-mode kernel is executed with a customized state "
4371344a3780SDimitry Andric                      "machine that requires a fallback.";
4372344a3780SDimitry Andric       };
4373344a3780SDimitry Andric       A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark);
4374344a3780SDimitry Andric 
4375344a3780SDimitry Andric       // Tell the user why we ended up with a fallback.
4376344a3780SDimitry Andric       for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
4377344a3780SDimitry Andric         if (!UnknownParallelRegionCB)
4378344a3780SDimitry Andric           continue;
4379344a3780SDimitry Andric         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
4380344a3780SDimitry Andric           return ORA << "Call may contain unknown parallel regions. Use "
4381ac9a064cSDimitry Andric                      << "`[[omp::assume(\"omp_no_parallelism\")]]` to "
4382344a3780SDimitry Andric                         "override.";
4383344a3780SDimitry Andric         };
4384344a3780SDimitry Andric         A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB,
4385344a3780SDimitry Andric                                                  "OMP133", Remark);
4386344a3780SDimitry Andric       }
4387344a3780SDimitry Andric     }
4388344a3780SDimitry Andric 
4389344a3780SDimitry Andric     // Create all the blocks:
4390344a3780SDimitry Andric     //
4391344a3780SDimitry Andric     //                       InitCB = __kmpc_target_init(...)
4392c0981da4SDimitry Andric     //                       BlockHwSize =
4393c0981da4SDimitry Andric     //                         __kmpc_get_hardware_num_threads_in_block();
4394c0981da4SDimitry Andric     //                       WarpSize = __kmpc_get_warp_size();
4395c0981da4SDimitry Andric     //                       BlockSize = BlockHwSize - WarpSize;
4396145449b1SDimitry Andric     // IsWorkerCheckBB:      bool IsWorker = InitCB != -1;
4397344a3780SDimitry Andric     //                       if (IsWorker) {
4398145449b1SDimitry Andric     //                         if (InitCB >= BlockSize) return;
4399c0981da4SDimitry Andric     // SMBeginBB:               __kmpc_barrier_simple_generic(...);
4400344a3780SDimitry Andric     //                         void *WorkFn;
4401344a3780SDimitry Andric     //                         bool Active = __kmpc_kernel_parallel(&WorkFn);
4402344a3780SDimitry Andric     //                         if (!WorkFn) return;
4403344a3780SDimitry Andric     // SMIsActiveCheckBB:       if (Active) {
4404344a3780SDimitry Andric     // SMIfCascadeCurrentBB:      if      (WorkFn == <ParFn0>)
4405344a3780SDimitry Andric     //                              ParFn0(...);
4406344a3780SDimitry Andric     // SMIfCascadeCurrentBB:      else if (WorkFn == <ParFn1>)
4407344a3780SDimitry Andric     //                              ParFn1(...);
4408344a3780SDimitry Andric     //                            ...
4409344a3780SDimitry Andric     // SMIfCascadeCurrentBB:      else
4410344a3780SDimitry Andric     //                              ((WorkFnTy*)WorkFn)(...);
4411344a3780SDimitry Andric     // SMEndParallelBB:           __kmpc_kernel_end_parallel(...);
4412344a3780SDimitry Andric     //                          }
4413c0981da4SDimitry Andric     // SMDoneBB:                __kmpc_barrier_simple_generic(...);
4414344a3780SDimitry Andric     //                          goto SMBeginBB;
4415344a3780SDimitry Andric     //                       }
4416344a3780SDimitry Andric     // UserCodeEntryBB:      // user code
4417344a3780SDimitry Andric     //                       __kmpc_target_deinit(...)
4418344a3780SDimitry Andric     //
4419b1c73532SDimitry Andric     auto &Ctx = getAnchorValue().getContext();
4420344a3780SDimitry Andric     Function *Kernel = getAssociatedFunction();
4421344a3780SDimitry Andric     assert(Kernel && "Expected an associated function!");
4422344a3780SDimitry Andric 
4423344a3780SDimitry Andric     BasicBlock *InitBB = KernelInitCB->getParent();
4424344a3780SDimitry Andric     BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
4425344a3780SDimitry Andric         KernelInitCB->getNextNode(), "thread.user_code.check");
4426c0981da4SDimitry Andric     BasicBlock *IsWorkerCheckBB =
4427c0981da4SDimitry Andric         BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB);
4428344a3780SDimitry Andric     BasicBlock *StateMachineBeginBB = BasicBlock::Create(
4429344a3780SDimitry Andric         Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
4430344a3780SDimitry Andric     BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
4431344a3780SDimitry Andric         Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
4432344a3780SDimitry Andric     BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
4433344a3780SDimitry Andric         Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
4434344a3780SDimitry Andric     BasicBlock *StateMachineIfCascadeCurrentBB =
4435344a3780SDimitry Andric         BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
4436344a3780SDimitry Andric                            Kernel, UserCodeEntryBB);
4437344a3780SDimitry Andric     BasicBlock *StateMachineEndParallelBB =
4438344a3780SDimitry Andric         BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
4439344a3780SDimitry Andric                            Kernel, UserCodeEntryBB);
4440344a3780SDimitry Andric     BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
4441344a3780SDimitry Andric         Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
4442344a3780SDimitry Andric     A.registerManifestAddedBasicBlock(*InitBB);
4443344a3780SDimitry Andric     A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
4444c0981da4SDimitry Andric     A.registerManifestAddedBasicBlock(*IsWorkerCheckBB);
4445344a3780SDimitry Andric     A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
4446344a3780SDimitry Andric     A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
4447344a3780SDimitry Andric     A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
4448344a3780SDimitry Andric     A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB);
4449344a3780SDimitry Andric     A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB);
4450344a3780SDimitry Andric     A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB);
4451344a3780SDimitry Andric 
4452344a3780SDimitry Andric     const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
4453344a3780SDimitry Andric     ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
4454344a3780SDimitry Andric     InitBB->getTerminator()->eraseFromParent();
4455c0981da4SDimitry Andric 
4456145449b1SDimitry Andric     Instruction *IsWorker =
4457145449b1SDimitry Andric         ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
4458145449b1SDimitry Andric                          ConstantInt::get(KernelInitCB->getType(), -1),
4459145449b1SDimitry Andric                          "thread.is_worker", InitBB);
4460145449b1SDimitry Andric     IsWorker->setDebugLoc(DLoc);
4461145449b1SDimitry Andric     BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
4462145449b1SDimitry Andric 
4463c0981da4SDimitry Andric     Module &M = *Kernel->getParent();
4464c0981da4SDimitry Andric     FunctionCallee BlockHwSizeFn =
4465c0981da4SDimitry Andric         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4466c0981da4SDimitry Andric             M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
4467c0981da4SDimitry Andric     FunctionCallee WarpSizeFn =
4468c0981da4SDimitry Andric         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4469c0981da4SDimitry Andric             M, OMPRTL___kmpc_get_warp_size);
44706f8fc217SDimitry Andric     CallInst *BlockHwSize =
4471145449b1SDimitry Andric         CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
44726f8fc217SDimitry Andric     OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
4473c0981da4SDimitry Andric     BlockHwSize->setDebugLoc(DLoc);
4474145449b1SDimitry Andric     CallInst *WarpSize =
4475145449b1SDimitry Andric         CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
44766f8fc217SDimitry Andric     OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
4477c0981da4SDimitry Andric     WarpSize->setDebugLoc(DLoc);
4478145449b1SDimitry Andric     Instruction *BlockSize = BinaryOperator::CreateSub(
4479145449b1SDimitry Andric         BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
4480c0981da4SDimitry Andric     BlockSize->setDebugLoc(DLoc);
4481145449b1SDimitry Andric     Instruction *IsMainOrWorker = ICmpInst::Create(
4482145449b1SDimitry Andric         ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
4483145449b1SDimitry Andric         "thread.is_main_or_worker", IsWorkerCheckBB);
4484c0981da4SDimitry Andric     IsMainOrWorker->setDebugLoc(DLoc);
4485145449b1SDimitry Andric     BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
4486145449b1SDimitry Andric                        IsMainOrWorker, IsWorkerCheckBB);
4487344a3780SDimitry Andric 
4488344a3780SDimitry Andric     // Create local storage for the work function pointer.
4489c0981da4SDimitry Andric     const DataLayout &DL = M.getDataLayout();
4490b1c73532SDimitry Andric     Type *VoidPtrTy = PointerType::getUnqual(Ctx);
4491c0981da4SDimitry Andric     Instruction *WorkFnAI =
4492c0981da4SDimitry Andric         new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
4493ac9a064cSDimitry Andric                        "worker.work_fn.addr", Kernel->getEntryBlock().begin());
4494344a3780SDimitry Andric     WorkFnAI->setDebugLoc(DLoc);
4495344a3780SDimitry Andric 
4496344a3780SDimitry Andric     OMPInfoCache.OMPBuilder.updateToLocation(
4497344a3780SDimitry Andric         OpenMPIRBuilder::LocationDescription(
4498344a3780SDimitry Andric             IRBuilder<>::InsertPoint(StateMachineBeginBB,
4499344a3780SDimitry Andric                                      StateMachineBeginBB->end()),
4500344a3780SDimitry Andric             DLoc));
4501344a3780SDimitry Andric 
4502b1c73532SDimitry Andric     Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC);
4503344a3780SDimitry Andric     Value *GTid = KernelInitCB;
4504344a3780SDimitry Andric 
4505344a3780SDimitry Andric     FunctionCallee BarrierFn =
4506344a3780SDimitry Andric         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4507c0981da4SDimitry Andric             M, OMPRTL___kmpc_barrier_simple_generic);
45086f8fc217SDimitry Andric     CallInst *Barrier =
45096f8fc217SDimitry Andric         CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB);
45106f8fc217SDimitry Andric     OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
45116f8fc217SDimitry Andric     Barrier->setDebugLoc(DLoc);
4512344a3780SDimitry Andric 
4513c0981da4SDimitry Andric     if (WorkFnAI->getType()->getPointerAddressSpace() !=
4514c0981da4SDimitry Andric         (unsigned int)AddressSpace::Generic) {
4515c0981da4SDimitry Andric       WorkFnAI = new AddrSpaceCastInst(
45167fa27ce4SDimitry Andric           WorkFnAI, PointerType::get(Ctx, (unsigned int)AddressSpace::Generic),
4517c0981da4SDimitry Andric           WorkFnAI->getName() + ".generic", StateMachineBeginBB);
4518c0981da4SDimitry Andric       WorkFnAI->setDebugLoc(DLoc);
4519c0981da4SDimitry Andric     }
4520c0981da4SDimitry Andric 
4521344a3780SDimitry Andric     FunctionCallee KernelParallelFn =
4522344a3780SDimitry Andric         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4523344a3780SDimitry Andric             M, OMPRTL___kmpc_kernel_parallel);
45246f8fc217SDimitry Andric     CallInst *IsActiveWorker = CallInst::Create(
4525344a3780SDimitry Andric         KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
45266f8fc217SDimitry Andric     OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker);
4527344a3780SDimitry Andric     IsActiveWorker->setDebugLoc(DLoc);
4528344a3780SDimitry Andric     Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
4529344a3780SDimitry Andric                                        StateMachineBeginBB);
4530344a3780SDimitry Andric     WorkFn->setDebugLoc(DLoc);
4531344a3780SDimitry Andric 
4532344a3780SDimitry Andric     FunctionType *ParallelRegionFnTy = FunctionType::get(
4533344a3780SDimitry Andric         Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
4534344a3780SDimitry Andric         false);
4535344a3780SDimitry Andric 
4536344a3780SDimitry Andric     Instruction *IsDone =
4537344a3780SDimitry Andric         ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
4538344a3780SDimitry Andric                          Constant::getNullValue(VoidPtrTy), "worker.is_done",
4539344a3780SDimitry Andric                          StateMachineBeginBB);
4540344a3780SDimitry Andric     IsDone->setDebugLoc(DLoc);
4541344a3780SDimitry Andric     BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
4542344a3780SDimitry Andric                        IsDone, StateMachineBeginBB)
4543344a3780SDimitry Andric         ->setDebugLoc(DLoc);
4544344a3780SDimitry Andric 
4545344a3780SDimitry Andric     BranchInst::Create(StateMachineIfCascadeCurrentBB,
4546344a3780SDimitry Andric                        StateMachineDoneBarrierBB, IsActiveWorker,
4547344a3780SDimitry Andric                        StateMachineIsActiveCheckBB)
4548344a3780SDimitry Andric         ->setDebugLoc(DLoc);
4549344a3780SDimitry Andric 
4550344a3780SDimitry Andric     Value *ZeroArg =
4551344a3780SDimitry Andric         Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
4552344a3780SDimitry Andric 
4553b1c73532SDimitry Andric     const unsigned int WrapperFunctionArgNo = 6;
4554b1c73532SDimitry Andric 
4555344a3780SDimitry Andric     // Now that we have most of the CFG skeleton it is time for the if-cascade
4556344a3780SDimitry Andric     // that checks the function pointer we got from the runtime against the
4557344a3780SDimitry Andric     // parallel regions we expect, if there are any.
4558c0981da4SDimitry Andric     for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
4559b1c73532SDimitry Andric       auto *CB = ReachedKnownParallelRegions[I];
4560b1c73532SDimitry Andric       auto *ParallelRegion = dyn_cast<Function>(
4561b1c73532SDimitry Andric           CB->getArgOperand(WrapperFunctionArgNo)->stripPointerCasts());
4562344a3780SDimitry Andric       BasicBlock *PRExecuteBB = BasicBlock::Create(
4563344a3780SDimitry Andric           Ctx, "worker_state_machine.parallel_region.execute", Kernel,
4564344a3780SDimitry Andric           StateMachineEndParallelBB);
4565344a3780SDimitry Andric       CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
4566344a3780SDimitry Andric           ->setDebugLoc(DLoc);
4567344a3780SDimitry Andric       BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
4568344a3780SDimitry Andric           ->setDebugLoc(DLoc);
4569344a3780SDimitry Andric 
4570344a3780SDimitry Andric       BasicBlock *PRNextBB =
4571344a3780SDimitry Andric           BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
4572344a3780SDimitry Andric                              Kernel, StateMachineEndParallelBB);
4573b1c73532SDimitry Andric       A.registerManifestAddedBasicBlock(*PRExecuteBB);
4574b1c73532SDimitry Andric       A.registerManifestAddedBasicBlock(*PRNextBB);
4575344a3780SDimitry Andric 
4576344a3780SDimitry Andric       // Check if we need to compare the pointer at all or if we can just
4577344a3780SDimitry Andric       // call the parallel region function.
4578344a3780SDimitry Andric       Value *IsPR;
4579c0981da4SDimitry Andric       if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
4580344a3780SDimitry Andric         Instruction *CmpI = ICmpInst::Create(
4581b1c73532SDimitry Andric             ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, ParallelRegion,
4582344a3780SDimitry Andric             "worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
4583344a3780SDimitry Andric         CmpI->setDebugLoc(DLoc);
4584344a3780SDimitry Andric         IsPR = CmpI;
4585344a3780SDimitry Andric       } else {
4586344a3780SDimitry Andric         IsPR = ConstantInt::getTrue(Ctx);
4587344a3780SDimitry Andric       }
4588344a3780SDimitry Andric 
4589344a3780SDimitry Andric       BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
4590344a3780SDimitry Andric                          StateMachineIfCascadeCurrentBB)
4591344a3780SDimitry Andric           ->setDebugLoc(DLoc);
4592344a3780SDimitry Andric       StateMachineIfCascadeCurrentBB = PRNextBB;
4593344a3780SDimitry Andric     }
4594344a3780SDimitry Andric 
4595344a3780SDimitry Andric     // At the end of the if-cascade we place the indirect function pointer call
4596344a3780SDimitry Andric     // in case we might need it, that is if there can be parallel regions we
4597344a3780SDimitry Andric     // have not handled in the if-cascade above.
4598344a3780SDimitry Andric     if (!ReachedUnknownParallelRegions.empty()) {
4599344a3780SDimitry Andric       StateMachineIfCascadeCurrentBB->setName(
4600344a3780SDimitry Andric           "worker_state_machine.parallel_region.fallback.execute");
4601b1c73532SDimitry Andric       CallInst::Create(ParallelRegionFnTy, WorkFn, {ZeroArg, GTid}, "",
4602344a3780SDimitry Andric                        StateMachineIfCascadeCurrentBB)
4603344a3780SDimitry Andric           ->setDebugLoc(DLoc);
4604344a3780SDimitry Andric     }
4605344a3780SDimitry Andric     BranchInst::Create(StateMachineEndParallelBB,
4606344a3780SDimitry Andric                        StateMachineIfCascadeCurrentBB)
4607344a3780SDimitry Andric         ->setDebugLoc(DLoc);
4608344a3780SDimitry Andric 
46096f8fc217SDimitry Andric     FunctionCallee EndParallelFn =
46106f8fc217SDimitry Andric         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
46116f8fc217SDimitry Andric             M, OMPRTL___kmpc_kernel_end_parallel);
46126f8fc217SDimitry Andric     CallInst *EndParallel =
46136f8fc217SDimitry Andric         CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB);
46146f8fc217SDimitry Andric     OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel);
46156f8fc217SDimitry Andric     EndParallel->setDebugLoc(DLoc);
4616344a3780SDimitry Andric     BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
4617344a3780SDimitry Andric         ->setDebugLoc(DLoc);
4618344a3780SDimitry Andric 
4619344a3780SDimitry Andric     CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
4620344a3780SDimitry Andric         ->setDebugLoc(DLoc);
4621344a3780SDimitry Andric     BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
4622344a3780SDimitry Andric         ->setDebugLoc(DLoc);
4623344a3780SDimitry Andric 
4624b1c73532SDimitry Andric     return true;
4625344a3780SDimitry Andric   }
4626344a3780SDimitry Andric 
4627344a3780SDimitry Andric   /// Fixpoint iteration update function. Will be called every time a dependence
4628344a3780SDimitry Andric   /// changed its state (and in the beginning).
updateImpl__anon7bbaa8dc0111::AAKernelInfoFunction4629344a3780SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
4630344a3780SDimitry Andric     KernelInfoState StateBefore = getState();
4631344a3780SDimitry Andric 
4632b1c73532SDimitry Andric     // When we leave this function this RAII will make sure the member
4633b1c73532SDimitry Andric     // KernelEnvC is updated properly depending on the state. That member is
4634b1c73532SDimitry Andric     // used for simplification of values and needs to be up to date at all
4635b1c73532SDimitry Andric     // times.
4636b1c73532SDimitry Andric     struct UpdateKernelEnvCRAII {
4637b1c73532SDimitry Andric       AAKernelInfoFunction &AA;
4638b1c73532SDimitry Andric 
4639b1c73532SDimitry Andric       UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {}
4640b1c73532SDimitry Andric 
4641b1c73532SDimitry Andric       ~UpdateKernelEnvCRAII() {
4642b1c73532SDimitry Andric         if (!AA.KernelEnvC)
4643b1c73532SDimitry Andric           return;
4644b1c73532SDimitry Andric 
4645b1c73532SDimitry Andric         ConstantStruct *ExistingKernelEnvC =
4646b1c73532SDimitry Andric             KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB);
4647b1c73532SDimitry Andric 
4648b1c73532SDimitry Andric         if (!AA.isValidState()) {
4649b1c73532SDimitry Andric           AA.KernelEnvC = ExistingKernelEnvC;
4650b1c73532SDimitry Andric           return;
4651b1c73532SDimitry Andric         }
4652b1c73532SDimitry Andric 
4653b1c73532SDimitry Andric         if (!AA.ReachedKnownParallelRegions.isValidState())
4654b1c73532SDimitry Andric           AA.setUseGenericStateMachineOfKernelEnvironment(
4655b1c73532SDimitry Andric               KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
4656b1c73532SDimitry Andric                   ExistingKernelEnvC));
4657b1c73532SDimitry Andric 
4658b1c73532SDimitry Andric         if (!AA.SPMDCompatibilityTracker.isValidState())
4659b1c73532SDimitry Andric           AA.setExecModeOfKernelEnvironment(
4660b1c73532SDimitry Andric               KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC));
4661b1c73532SDimitry Andric 
4662b1c73532SDimitry Andric         ConstantInt *MayUseNestedParallelismC =
4663b1c73532SDimitry Andric             KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(
4664b1c73532SDimitry Andric                 AA.KernelEnvC);
4665b1c73532SDimitry Andric         ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get(
466699aabd70SDimitry Andric             MayUseNestedParallelismC->getIntegerType(), AA.NestedParallelism);
4667b1c73532SDimitry Andric         AA.setMayUseNestedParallelismOfKernelEnvironment(
4668b1c73532SDimitry Andric             NewMayUseNestedParallelismC);
4669b1c73532SDimitry Andric       }
4670b1c73532SDimitry Andric     } RAII(*this);
4671b1c73532SDimitry Andric 
4672344a3780SDimitry Andric     // Callback to check a read/write instruction.
4673344a3780SDimitry Andric     auto CheckRWInst = [&](Instruction &I) {
4674344a3780SDimitry Andric       // We handle calls later.
4675344a3780SDimitry Andric       if (isa<CallBase>(I))
4676344a3780SDimitry Andric         return true;
4677344a3780SDimitry Andric       // We only care about write effects.
4678344a3780SDimitry Andric       if (!I.mayWriteToMemory())
4679344a3780SDimitry Andric         return true;
4680344a3780SDimitry Andric       if (auto *SI = dyn_cast<StoreInst>(&I)) {
46817fa27ce4SDimitry Andric         const auto *UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>(
4682e3b55780SDimitry Andric             *this, IRPosition::value(*SI->getPointerOperand()),
4683e3b55780SDimitry Andric             DepClassTy::OPTIONAL);
46847fa27ce4SDimitry Andric         auto *HS = A.getAAFor<AAHeapToStack>(
4685c0981da4SDimitry Andric             *this, IRPosition::function(*I.getFunction()),
4686c0981da4SDimitry Andric             DepClassTy::OPTIONAL);
46877fa27ce4SDimitry Andric         if (UnderlyingObjsAA &&
46887fa27ce4SDimitry Andric             UnderlyingObjsAA->forallUnderlyingObjects([&](Value &Obj) {
4689e3b55780SDimitry Andric               if (AA::isAssumedThreadLocalObject(A, Obj, *this))
4690c0981da4SDimitry Andric                 return true;
4691e3b55780SDimitry Andric               // Check for AAHeapToStack moved objects which must not be
4692e3b55780SDimitry Andric               // guarded.
4693e3b55780SDimitry Andric               auto *CB = dyn_cast<CallBase>(&Obj);
46947fa27ce4SDimitry Andric               return CB && HS && HS->isAssumedHeapToStack(*CB);
4695e3b55780SDimitry Andric             }))
4696e3b55780SDimitry Andric           return true;
4697c0981da4SDimitry Andric       }
4698c0981da4SDimitry Andric 
4699c0981da4SDimitry Andric       // Insert instruction that needs guarding.
4700344a3780SDimitry Andric       SPMDCompatibilityTracker.insert(&I);
4701344a3780SDimitry Andric       return true;
4702344a3780SDimitry Andric     };
4703344a3780SDimitry Andric 
4704344a3780SDimitry Andric     bool UsedAssumedInformationInCheckRWInst = false;
4705344a3780SDimitry Andric     if (!SPMDCompatibilityTracker.isAtFixpoint())
4706344a3780SDimitry Andric       if (!A.checkForAllReadWriteInstructions(
4707344a3780SDimitry Andric               CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
4708344a3780SDimitry Andric         SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4709344a3780SDimitry Andric 
4710f65dcba8SDimitry Andric     bool UsedAssumedInformationFromReachingKernels = false;
4711344a3780SDimitry Andric     if (!IsKernelEntry) {
4712344a3780SDimitry Andric       updateParallelLevels(A);
4713c0981da4SDimitry Andric 
4714f65dcba8SDimitry Andric       bool AllReachingKernelsKnown = true;
4715f65dcba8SDimitry Andric       updateReachingKernelEntries(A, AllReachingKernelsKnown);
4716f65dcba8SDimitry Andric       UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown;
4717f65dcba8SDimitry Andric 
4718e3b55780SDimitry Andric       if (!SPMDCompatibilityTracker.empty()) {
4719c0981da4SDimitry Andric         if (!ParallelLevels.isValidState())
4720c0981da4SDimitry Andric           SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4721f65dcba8SDimitry Andric         else if (!ReachingKernelEntries.isValidState())
4722f65dcba8SDimitry Andric           SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4723e3b55780SDimitry Andric         else {
4724f65dcba8SDimitry Andric           // Check if all reaching kernels agree on the mode as we can otherwise
4725f65dcba8SDimitry Andric           // not guard instructions. We might not be sure about the mode so we
4726f65dcba8SDimitry Andric           // we cannot fix the internal spmd-zation state either.
4727f65dcba8SDimitry Andric           int SPMD = 0, Generic = 0;
4728f65dcba8SDimitry Andric           for (auto *Kernel : ReachingKernelEntries) {
47297fa27ce4SDimitry Andric             auto *CBAA = A.getAAFor<AAKernelInfo>(
4730f65dcba8SDimitry Andric                 *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL);
47317fa27ce4SDimitry Andric             if (CBAA && CBAA->SPMDCompatibilityTracker.isValidState() &&
47327fa27ce4SDimitry Andric                 CBAA->SPMDCompatibilityTracker.isAssumed())
4733f65dcba8SDimitry Andric               ++SPMD;
4734f65dcba8SDimitry Andric             else
4735f65dcba8SDimitry Andric               ++Generic;
47367fa27ce4SDimitry Andric             if (!CBAA || !CBAA->SPMDCompatibilityTracker.isAtFixpoint())
4737f65dcba8SDimitry Andric               UsedAssumedInformationFromReachingKernels = true;
4738f65dcba8SDimitry Andric           }
4739f65dcba8SDimitry Andric           if (SPMD != 0 && Generic != 0)
4740f65dcba8SDimitry Andric             SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4741f65dcba8SDimitry Andric         }
4742344a3780SDimitry Andric       }
4743e3b55780SDimitry Andric     }
4744344a3780SDimitry Andric 
4745344a3780SDimitry Andric     // Callback to check a call instruction.
4746c0981da4SDimitry Andric     bool AllParallelRegionStatesWereFixed = true;
4747344a3780SDimitry Andric     bool AllSPMDStatesWereFixed = true;
4748344a3780SDimitry Andric     auto CheckCallInst = [&](Instruction &I) {
4749344a3780SDimitry Andric       auto &CB = cast<CallBase>(I);
47507fa27ce4SDimitry Andric       auto *CBAA = A.getAAFor<AAKernelInfo>(
4751344a3780SDimitry Andric           *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
47527fa27ce4SDimitry Andric       if (!CBAA)
47537fa27ce4SDimitry Andric         return false;
47547fa27ce4SDimitry Andric       getState() ^= CBAA->getState();
47557fa27ce4SDimitry Andric       AllSPMDStatesWereFixed &= CBAA->SPMDCompatibilityTracker.isAtFixpoint();
4756c0981da4SDimitry Andric       AllParallelRegionStatesWereFixed &=
47577fa27ce4SDimitry Andric           CBAA->ReachedKnownParallelRegions.isAtFixpoint();
4758c0981da4SDimitry Andric       AllParallelRegionStatesWereFixed &=
47597fa27ce4SDimitry Andric           CBAA->ReachedUnknownParallelRegions.isAtFixpoint();
4760344a3780SDimitry Andric       return true;
4761344a3780SDimitry Andric     };
4762344a3780SDimitry Andric 
4763344a3780SDimitry Andric     bool UsedAssumedInformationInCheckCallInst = false;
4764344a3780SDimitry Andric     if (!A.checkForAllCallLikeInstructions(
4765c0981da4SDimitry Andric             CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
4766c0981da4SDimitry Andric       LLVM_DEBUG(dbgs() << TAG
4767c0981da4SDimitry Andric                         << "Failed to visit all call-like instructions!\n";);
4768344a3780SDimitry Andric       return indicatePessimisticFixpoint();
4769c0981da4SDimitry Andric     }
4770c0981da4SDimitry Andric 
4771c0981da4SDimitry Andric     // If we haven't used any assumed information for the reached parallel
4772c0981da4SDimitry Andric     // region states we can fix it.
4773c0981da4SDimitry Andric     if (!UsedAssumedInformationInCheckCallInst &&
4774c0981da4SDimitry Andric         AllParallelRegionStatesWereFixed) {
4775c0981da4SDimitry Andric       ReachedKnownParallelRegions.indicateOptimisticFixpoint();
4776c0981da4SDimitry Andric       ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
4777c0981da4SDimitry Andric     }
4778c0981da4SDimitry Andric 
4779344a3780SDimitry Andric     // If we haven't used any assumed information for the SPMD state we can fix
4780344a3780SDimitry Andric     // it.
4781344a3780SDimitry Andric     if (!UsedAssumedInformationInCheckRWInst &&
4782f65dcba8SDimitry Andric         !UsedAssumedInformationInCheckCallInst &&
4783f65dcba8SDimitry Andric         !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed)
4784344a3780SDimitry Andric       SPMDCompatibilityTracker.indicateOptimisticFixpoint();
4785344a3780SDimitry Andric 
4786344a3780SDimitry Andric     return StateBefore == getState() ? ChangeStatus::UNCHANGED
4787344a3780SDimitry Andric                                      : ChangeStatus::CHANGED;
4788344a3780SDimitry Andric   }
4789344a3780SDimitry Andric 
4790344a3780SDimitry Andric private:
4791344a3780SDimitry Andric   /// Update info regarding reaching kernels.
updateReachingKernelEntries__anon7bbaa8dc0111::AAKernelInfoFunction4792f65dcba8SDimitry Andric   void updateReachingKernelEntries(Attributor &A,
4793f65dcba8SDimitry Andric                                    bool &AllReachingKernelsKnown) {
4794344a3780SDimitry Andric     auto PredCallSite = [&](AbstractCallSite ACS) {
4795344a3780SDimitry Andric       Function *Caller = ACS.getInstruction()->getFunction();
4796344a3780SDimitry Andric 
4797344a3780SDimitry Andric       assert(Caller && "Caller is nullptr");
4798344a3780SDimitry Andric 
47997fa27ce4SDimitry Andric       auto *CAA = A.getOrCreateAAFor<AAKernelInfo>(
4800344a3780SDimitry Andric           IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
48017fa27ce4SDimitry Andric       if (CAA && CAA->ReachingKernelEntries.isValidState()) {
48027fa27ce4SDimitry Andric         ReachingKernelEntries ^= CAA->ReachingKernelEntries;
4803344a3780SDimitry Andric         return true;
4804344a3780SDimitry Andric       }
4805344a3780SDimitry Andric 
4806344a3780SDimitry Andric       // We lost track of the caller of the associated function, any kernel
4807344a3780SDimitry Andric       // could reach now.
4808344a3780SDimitry Andric       ReachingKernelEntries.indicatePessimisticFixpoint();
4809344a3780SDimitry Andric 
4810344a3780SDimitry Andric       return true;
4811344a3780SDimitry Andric     };
4812344a3780SDimitry Andric 
4813344a3780SDimitry Andric     if (!A.checkForAllCallSites(PredCallSite, *this,
4814344a3780SDimitry Andric                                 true /* RequireAllCallSites */,
4815f65dcba8SDimitry Andric                                 AllReachingKernelsKnown))
4816344a3780SDimitry Andric       ReachingKernelEntries.indicatePessimisticFixpoint();
4817344a3780SDimitry Andric   }
4818344a3780SDimitry Andric 
4819344a3780SDimitry Andric   /// Update info regarding parallel levels.
updateParallelLevels__anon7bbaa8dc0111::AAKernelInfoFunction4820344a3780SDimitry Andric   void updateParallelLevels(Attributor &A) {
4821344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4822344a3780SDimitry Andric     OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI =
4823344a3780SDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
4824344a3780SDimitry Andric 
4825344a3780SDimitry Andric     auto PredCallSite = [&](AbstractCallSite ACS) {
4826344a3780SDimitry Andric       Function *Caller = ACS.getInstruction()->getFunction();
4827344a3780SDimitry Andric 
4828344a3780SDimitry Andric       assert(Caller && "Caller is nullptr");
4829344a3780SDimitry Andric 
48307fa27ce4SDimitry Andric       auto *CAA =
4831344a3780SDimitry Andric           A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
48327fa27ce4SDimitry Andric       if (CAA && CAA->ParallelLevels.isValidState()) {
4833344a3780SDimitry Andric         // Any function that is called by `__kmpc_parallel_51` will not be
4834344a3780SDimitry Andric         // folded as the parallel level in the function is updated. In order to
4835344a3780SDimitry Andric         // get it right, all the analysis would depend on the implentation. That
4836344a3780SDimitry Andric         // said, if in the future any change to the implementation, the analysis
4837344a3780SDimitry Andric         // could be wrong. As a consequence, we are just conservative here.
4838344a3780SDimitry Andric         if (Caller == Parallel51RFI.Declaration) {
4839344a3780SDimitry Andric           ParallelLevels.indicatePessimisticFixpoint();
4840344a3780SDimitry Andric           return true;
4841344a3780SDimitry Andric         }
4842344a3780SDimitry Andric 
48437fa27ce4SDimitry Andric         ParallelLevels ^= CAA->ParallelLevels;
4844344a3780SDimitry Andric 
4845344a3780SDimitry Andric         return true;
4846344a3780SDimitry Andric       }
4847344a3780SDimitry Andric 
4848344a3780SDimitry Andric       // We lost track of the caller of the associated function, any kernel
4849344a3780SDimitry Andric       // could reach now.
4850344a3780SDimitry Andric       ParallelLevels.indicatePessimisticFixpoint();
4851344a3780SDimitry Andric 
4852344a3780SDimitry Andric       return true;
4853344a3780SDimitry Andric     };
4854344a3780SDimitry Andric 
4855344a3780SDimitry Andric     bool AllCallSitesKnown = true;
4856344a3780SDimitry Andric     if (!A.checkForAllCallSites(PredCallSite, *this,
4857344a3780SDimitry Andric                                 true /* RequireAllCallSites */,
4858344a3780SDimitry Andric                                 AllCallSitesKnown))
4859344a3780SDimitry Andric       ParallelLevels.indicatePessimisticFixpoint();
4860344a3780SDimitry Andric   }
4861344a3780SDimitry Andric };
4862344a3780SDimitry Andric 
4863344a3780SDimitry Andric /// The call site kernel info abstract attribute, basically, what can we say
4864344a3780SDimitry Andric /// about a call site with regards to the KernelInfoState. For now this simply
4865344a3780SDimitry Andric /// forwards the information from the callee.
4866344a3780SDimitry Andric struct AAKernelInfoCallSite : AAKernelInfo {
AAKernelInfoCallSite__anon7bbaa8dc0111::AAKernelInfoCallSite4867344a3780SDimitry Andric   AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
4868344a3780SDimitry Andric       : AAKernelInfo(IRP, A) {}
4869344a3780SDimitry Andric 
4870344a3780SDimitry Andric   /// See AbstractAttribute::initialize(...).
initialize__anon7bbaa8dc0111::AAKernelInfoCallSite4871344a3780SDimitry Andric   void initialize(Attributor &A) override {
4872344a3780SDimitry Andric     AAKernelInfo::initialize(A);
4873344a3780SDimitry Andric 
4874344a3780SDimitry Andric     CallBase &CB = cast<CallBase>(getAssociatedValue());
48757fa27ce4SDimitry Andric     auto *AssumptionAA = A.getAAFor<AAAssumptionInfo>(
4876c0981da4SDimitry Andric         *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
4877344a3780SDimitry Andric 
4878344a3780SDimitry Andric     // Check for SPMD-mode assumptions.
48797fa27ce4SDimitry Andric     if (AssumptionAA && AssumptionAA->hasAssumption("ompx_spmd_amenable")) {
4880c0981da4SDimitry Andric       indicateOptimisticFixpoint();
4881b1c73532SDimitry Andric       return;
4882c0981da4SDimitry Andric     }
4883344a3780SDimitry Andric 
4884344a3780SDimitry Andric     // First weed out calls we do not care about, that is readonly/readnone
4885344a3780SDimitry Andric     // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
4886344a3780SDimitry Andric     // parallel region or anything else we are looking for.
4887344a3780SDimitry Andric     if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
4888344a3780SDimitry Andric       indicateOptimisticFixpoint();
4889344a3780SDimitry Andric       return;
4890344a3780SDimitry Andric     }
4891344a3780SDimitry Andric 
4892344a3780SDimitry Andric     // Next we check if we know the callee. If it is a known OpenMP function
4893344a3780SDimitry Andric     // we will handle them explicitly in the switch below. If it is not, we
4894344a3780SDimitry Andric     // will use an AAKernelInfo object on the callee to gather information and
4895344a3780SDimitry Andric     // merge that into the current state. The latter happens in the updateImpl.
4896b1c73532SDimitry Andric     auto CheckCallee = [&](Function *Callee, unsigned NumCallees) {
4897344a3780SDimitry Andric       auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4898344a3780SDimitry Andric       const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
4899344a3780SDimitry Andric       if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
4900344a3780SDimitry Andric         // Unknown caller or declarations are not analyzable, we give up.
4901344a3780SDimitry Andric         if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
4902344a3780SDimitry Andric 
4903344a3780SDimitry Andric           // Unknown callees might contain parallel regions, except if they have
4904344a3780SDimitry Andric           // an appropriate assumption attached.
49057fa27ce4SDimitry Andric           if (!AssumptionAA ||
49067fa27ce4SDimitry Andric               !(AssumptionAA->hasAssumption("omp_no_openmp") ||
49077fa27ce4SDimitry Andric                 AssumptionAA->hasAssumption("omp_no_parallelism")))
4908344a3780SDimitry Andric             ReachedUnknownParallelRegions.insert(&CB);
4909344a3780SDimitry Andric 
4910344a3780SDimitry Andric           // If SPMDCompatibilityTracker is not fixed, we need to give up on the
4911344a3780SDimitry Andric           // idea we can run something unknown in SPMD-mode.
4912c0981da4SDimitry Andric           if (!SPMDCompatibilityTracker.isAtFixpoint()) {
4913c0981da4SDimitry Andric             SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4914344a3780SDimitry Andric             SPMDCompatibilityTracker.insert(&CB);
4915c0981da4SDimitry Andric           }
4916344a3780SDimitry Andric 
4917b1c73532SDimitry Andric           // We have updated the state for this unknown call properly, there
4918b1c73532SDimitry Andric           // won't be any change so we indicate a fixpoint.
4919344a3780SDimitry Andric           indicateOptimisticFixpoint();
4920344a3780SDimitry Andric         }
4921b1c73532SDimitry Andric         // If the callee is known and can be used in IPO, we will update the
4922b1c73532SDimitry Andric         // state based on the callee state in updateImpl.
4923b1c73532SDimitry Andric         return;
4924b1c73532SDimitry Andric       }
4925b1c73532SDimitry Andric       if (NumCallees > 1) {
4926b1c73532SDimitry Andric         indicatePessimisticFixpoint();
4927344a3780SDimitry Andric         return;
4928344a3780SDimitry Andric       }
4929344a3780SDimitry Andric 
4930344a3780SDimitry Andric       RuntimeFunction RF = It->getSecond();
4931344a3780SDimitry Andric       switch (RF) {
4932344a3780SDimitry Andric       // All the functions we know are compatible with SPMD mode.
4933344a3780SDimitry Andric       case OMPRTL___kmpc_is_spmd_exec_mode:
4934c0981da4SDimitry Andric       case OMPRTL___kmpc_distribute_static_fini:
4935344a3780SDimitry Andric       case OMPRTL___kmpc_for_static_fini:
4936344a3780SDimitry Andric       case OMPRTL___kmpc_global_thread_num:
4937344a3780SDimitry Andric       case OMPRTL___kmpc_get_hardware_num_threads_in_block:
4938344a3780SDimitry Andric       case OMPRTL___kmpc_get_hardware_num_blocks:
4939344a3780SDimitry Andric       case OMPRTL___kmpc_single:
4940344a3780SDimitry Andric       case OMPRTL___kmpc_end_single:
4941344a3780SDimitry Andric       case OMPRTL___kmpc_master:
4942344a3780SDimitry Andric       case OMPRTL___kmpc_end_master:
4943344a3780SDimitry Andric       case OMPRTL___kmpc_barrier:
494477fc4c14SDimitry Andric       case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2:
494577fc4c14SDimitry Andric       case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2:
4946b1c73532SDimitry Andric       case OMPRTL___kmpc_error:
4947b1c73532SDimitry Andric       case OMPRTL___kmpc_flush:
4948b1c73532SDimitry Andric       case OMPRTL___kmpc_get_hardware_thread_id_in_block:
4949b1c73532SDimitry Andric       case OMPRTL___kmpc_get_warp_size:
4950b1c73532SDimitry Andric       case OMPRTL_omp_get_thread_num:
4951b1c73532SDimitry Andric       case OMPRTL_omp_get_num_threads:
4952b1c73532SDimitry Andric       case OMPRTL_omp_get_max_threads:
4953b1c73532SDimitry Andric       case OMPRTL_omp_in_parallel:
4954b1c73532SDimitry Andric       case OMPRTL_omp_get_dynamic:
4955b1c73532SDimitry Andric       case OMPRTL_omp_get_cancellation:
4956b1c73532SDimitry Andric       case OMPRTL_omp_get_nested:
4957b1c73532SDimitry Andric       case OMPRTL_omp_get_schedule:
4958b1c73532SDimitry Andric       case OMPRTL_omp_get_thread_limit:
4959b1c73532SDimitry Andric       case OMPRTL_omp_get_supported_active_levels:
4960b1c73532SDimitry Andric       case OMPRTL_omp_get_max_active_levels:
4961b1c73532SDimitry Andric       case OMPRTL_omp_get_level:
4962b1c73532SDimitry Andric       case OMPRTL_omp_get_ancestor_thread_num:
4963b1c73532SDimitry Andric       case OMPRTL_omp_get_team_size:
4964b1c73532SDimitry Andric       case OMPRTL_omp_get_active_level:
4965b1c73532SDimitry Andric       case OMPRTL_omp_in_final:
4966b1c73532SDimitry Andric       case OMPRTL_omp_get_proc_bind:
4967b1c73532SDimitry Andric       case OMPRTL_omp_get_num_places:
4968b1c73532SDimitry Andric       case OMPRTL_omp_get_num_procs:
4969b1c73532SDimitry Andric       case OMPRTL_omp_get_place_proc_ids:
4970b1c73532SDimitry Andric       case OMPRTL_omp_get_place_num:
4971b1c73532SDimitry Andric       case OMPRTL_omp_get_partition_num_places:
4972b1c73532SDimitry Andric       case OMPRTL_omp_get_partition_place_nums:
4973b1c73532SDimitry Andric       case OMPRTL_omp_get_wtime:
4974344a3780SDimitry Andric         break;
4975c0981da4SDimitry Andric       case OMPRTL___kmpc_distribute_static_init_4:
4976c0981da4SDimitry Andric       case OMPRTL___kmpc_distribute_static_init_4u:
4977c0981da4SDimitry Andric       case OMPRTL___kmpc_distribute_static_init_8:
4978c0981da4SDimitry Andric       case OMPRTL___kmpc_distribute_static_init_8u:
4979344a3780SDimitry Andric       case OMPRTL___kmpc_for_static_init_4:
4980344a3780SDimitry Andric       case OMPRTL___kmpc_for_static_init_4u:
4981344a3780SDimitry Andric       case OMPRTL___kmpc_for_static_init_8:
4982344a3780SDimitry Andric       case OMPRTL___kmpc_for_static_init_8u: {
4983344a3780SDimitry Andric         // Check the schedule and allow static schedule in SPMD mode.
4984344a3780SDimitry Andric         unsigned ScheduleArgOpNo = 2;
4985344a3780SDimitry Andric         auto *ScheduleTypeCI =
4986344a3780SDimitry Andric             dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
4987344a3780SDimitry Andric         unsigned ScheduleTypeVal =
4988344a3780SDimitry Andric             ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
4989344a3780SDimitry Andric         switch (OMPScheduleType(ScheduleTypeVal)) {
4990145449b1SDimitry Andric         case OMPScheduleType::UnorderedStatic:
4991145449b1SDimitry Andric         case OMPScheduleType::UnorderedStaticChunked:
4992145449b1SDimitry Andric         case OMPScheduleType::OrderedDistribute:
4993145449b1SDimitry Andric         case OMPScheduleType::OrderedDistributeChunked:
4994344a3780SDimitry Andric           break;
4995344a3780SDimitry Andric         default:
4996c0981da4SDimitry Andric           SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4997344a3780SDimitry Andric           SPMDCompatibilityTracker.insert(&CB);
4998344a3780SDimitry Andric           break;
4999344a3780SDimitry Andric         };
5000344a3780SDimitry Andric       } break;
5001344a3780SDimitry Andric       case OMPRTL___kmpc_target_init:
5002344a3780SDimitry Andric         KernelInitCB = &CB;
5003344a3780SDimitry Andric         break;
5004344a3780SDimitry Andric       case OMPRTL___kmpc_target_deinit:
5005344a3780SDimitry Andric         KernelDeinitCB = &CB;
5006344a3780SDimitry Andric         break;
5007344a3780SDimitry Andric       case OMPRTL___kmpc_parallel_51:
5008b1c73532SDimitry Andric         if (!handleParallel51(A, CB))
5009b1c73532SDimitry Andric           indicatePessimisticFixpoint();
5010b1c73532SDimitry Andric         return;
5011344a3780SDimitry Andric       case OMPRTL___kmpc_omp_task:
5012344a3780SDimitry Andric         // We do not look into tasks right now, just give up.
501377fc4c14SDimitry Andric         SPMDCompatibilityTracker.indicatePessimisticFixpoint();
5014344a3780SDimitry Andric         SPMDCompatibilityTracker.insert(&CB);
5015344a3780SDimitry Andric         ReachedUnknownParallelRegions.insert(&CB);
5016344a3780SDimitry Andric         break;
5017344a3780SDimitry Andric       case OMPRTL___kmpc_alloc_shared:
5018344a3780SDimitry Andric       case OMPRTL___kmpc_free_shared:
5019344a3780SDimitry Andric         // Return without setting a fixpoint, to be resolved in updateImpl.
5020344a3780SDimitry Andric         return;
5021344a3780SDimitry Andric       default:
5022344a3780SDimitry Andric         // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
5023c0981da4SDimitry Andric         // generally. However, they do not hide parallel regions.
502477fc4c14SDimitry Andric         SPMDCompatibilityTracker.indicatePessimisticFixpoint();
5025344a3780SDimitry Andric         SPMDCompatibilityTracker.insert(&CB);
5026344a3780SDimitry Andric         break;
5027344a3780SDimitry Andric       }
5028344a3780SDimitry Andric       // All other OpenMP runtime calls will not reach parallel regions so they
5029b1c73532SDimitry Andric       // can be safely ignored for now. Since it is a known OpenMP runtime call
5030b1c73532SDimitry Andric       // we have now modeled all effects and there is no need for any update.
5031344a3780SDimitry Andric       indicateOptimisticFixpoint();
5032b1c73532SDimitry Andric     };
5033b1c73532SDimitry Andric 
5034b1c73532SDimitry Andric     const auto *AACE =
5035b1c73532SDimitry Andric         A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::OPTIONAL);
5036b1c73532SDimitry Andric     if (!AACE || !AACE->getState().isValidState() || AACE->hasUnknownCallee()) {
5037b1c73532SDimitry Andric       CheckCallee(getAssociatedFunction(), 1);
5038b1c73532SDimitry Andric       return;
5039b1c73532SDimitry Andric     }
5040b1c73532SDimitry Andric     const auto &OptimisticEdges = AACE->getOptimisticEdges();
5041b1c73532SDimitry Andric     for (auto *Callee : OptimisticEdges) {
5042b1c73532SDimitry Andric       CheckCallee(Callee, OptimisticEdges.size());
5043b1c73532SDimitry Andric       if (isAtFixpoint())
5044b1c73532SDimitry Andric         break;
5045b1c73532SDimitry Andric     }
5046344a3780SDimitry Andric   }
5047344a3780SDimitry Andric 
updateImpl__anon7bbaa8dc0111::AAKernelInfoCallSite5048344a3780SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
5049344a3780SDimitry Andric     // TODO: Once we have call site specific value information we can provide
5050344a3780SDimitry Andric     //       call site specific liveness information and then it makes
5051344a3780SDimitry Andric     //       sense to specialize attributes for call sites arguments instead of
5052344a3780SDimitry Andric     //       redirecting requests to the callee argument.
5053344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
5054b1c73532SDimitry Andric     KernelInfoState StateBefore = getState();
5055b1c73532SDimitry Andric 
5056b1c73532SDimitry Andric     auto CheckCallee = [&](Function *F, int NumCallees) {
5057344a3780SDimitry Andric       const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
5058344a3780SDimitry Andric 
5059b1c73532SDimitry Andric       // If F is not a runtime function, propagate the AAKernelInfo of the
5060b1c73532SDimitry Andric       // callee.
5061344a3780SDimitry Andric       if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
5062344a3780SDimitry Andric         const IRPosition &FnPos = IRPosition::function(*F);
5063b1c73532SDimitry Andric         auto *FnAA =
5064b1c73532SDimitry Andric             A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
50657fa27ce4SDimitry Andric         if (!FnAA)
50667fa27ce4SDimitry Andric           return indicatePessimisticFixpoint();
50677fa27ce4SDimitry Andric         if (getState() == FnAA->getState())
5068344a3780SDimitry Andric           return ChangeStatus::UNCHANGED;
50697fa27ce4SDimitry Andric         getState() = FnAA->getState();
5070344a3780SDimitry Andric         return ChangeStatus::CHANGED;
5071344a3780SDimitry Andric       }
5072b1c73532SDimitry Andric       if (NumCallees > 1)
5073b1c73532SDimitry Andric         return indicatePessimisticFixpoint();
5074b1c73532SDimitry Andric 
5075b1c73532SDimitry Andric       CallBase &CB = cast<CallBase>(getAssociatedValue());
5076b1c73532SDimitry Andric       if (It->getSecond() == OMPRTL___kmpc_parallel_51) {
5077b1c73532SDimitry Andric         if (!handleParallel51(A, CB))
5078b1c73532SDimitry Andric           return indicatePessimisticFixpoint();
5079b1c73532SDimitry Andric         return StateBefore == getState() ? ChangeStatus::UNCHANGED
5080b1c73532SDimitry Andric                                          : ChangeStatus::CHANGED;
5081b1c73532SDimitry Andric       }
5082344a3780SDimitry Andric 
5083344a3780SDimitry Andric       // F is a runtime function that allocates or frees memory, check
5084344a3780SDimitry Andric       // AAHeapToStack and AAHeapToShared.
5085b1c73532SDimitry Andric       assert(
5086b1c73532SDimitry Andric           (It->getSecond() == OMPRTL___kmpc_alloc_shared ||
5087344a3780SDimitry Andric            It->getSecond() == OMPRTL___kmpc_free_shared) &&
5088344a3780SDimitry Andric           "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call");
5089344a3780SDimitry Andric 
50907fa27ce4SDimitry Andric       auto *HeapToStackAA = A.getAAFor<AAHeapToStack>(
5091344a3780SDimitry Andric           *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
50927fa27ce4SDimitry Andric       auto *HeapToSharedAA = A.getAAFor<AAHeapToShared>(
5093344a3780SDimitry Andric           *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
5094344a3780SDimitry Andric 
5095344a3780SDimitry Andric       RuntimeFunction RF = It->getSecond();
5096344a3780SDimitry Andric 
5097344a3780SDimitry Andric       switch (RF) {
5098344a3780SDimitry Andric       // If neither HeapToStack nor HeapToShared assume the call is removed,
5099344a3780SDimitry Andric       // assume SPMD incompatibility.
5100344a3780SDimitry Andric       case OMPRTL___kmpc_alloc_shared:
51017fa27ce4SDimitry Andric         if ((!HeapToStackAA || !HeapToStackAA->isAssumedHeapToStack(CB)) &&
51027fa27ce4SDimitry Andric             (!HeapToSharedAA || !HeapToSharedAA->isAssumedHeapToShared(CB)))
5103344a3780SDimitry Andric           SPMDCompatibilityTracker.insert(&CB);
5104344a3780SDimitry Andric         break;
5105344a3780SDimitry Andric       case OMPRTL___kmpc_free_shared:
51067fa27ce4SDimitry Andric         if ((!HeapToStackAA ||
51077fa27ce4SDimitry Andric              !HeapToStackAA->isAssumedHeapToStackRemovedFree(CB)) &&
51087fa27ce4SDimitry Andric             (!HeapToSharedAA ||
51097fa27ce4SDimitry Andric              !HeapToSharedAA->isAssumedHeapToSharedRemovedFree(CB)))
5110344a3780SDimitry Andric           SPMDCompatibilityTracker.insert(&CB);
5111344a3780SDimitry Andric         break;
5112344a3780SDimitry Andric       default:
511377fc4c14SDimitry Andric         SPMDCompatibilityTracker.indicatePessimisticFixpoint();
5114344a3780SDimitry Andric         SPMDCompatibilityTracker.insert(&CB);
5115344a3780SDimitry Andric       }
5116b1c73532SDimitry Andric       return ChangeStatus::CHANGED;
5117b1c73532SDimitry Andric     };
5118b1c73532SDimitry Andric 
5119b1c73532SDimitry Andric     const auto *AACE =
5120b1c73532SDimitry Andric         A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::OPTIONAL);
5121b1c73532SDimitry Andric     if (!AACE || !AACE->getState().isValidState() || AACE->hasUnknownCallee()) {
5122b1c73532SDimitry Andric       if (Function *F = getAssociatedFunction())
5123b1c73532SDimitry Andric         CheckCallee(F, /*NumCallees=*/1);
5124b1c73532SDimitry Andric     } else {
5125b1c73532SDimitry Andric       const auto &OptimisticEdges = AACE->getOptimisticEdges();
5126b1c73532SDimitry Andric       for (auto *Callee : OptimisticEdges) {
5127b1c73532SDimitry Andric         CheckCallee(Callee, OptimisticEdges.size());
5128b1c73532SDimitry Andric         if (isAtFixpoint())
5129b1c73532SDimitry Andric           break;
5130b1c73532SDimitry Andric       }
5131b1c73532SDimitry Andric     }
5132344a3780SDimitry Andric 
5133344a3780SDimitry Andric     return StateBefore == getState() ? ChangeStatus::UNCHANGED
5134344a3780SDimitry Andric                                      : ChangeStatus::CHANGED;
5135344a3780SDimitry Andric   }
5136b1c73532SDimitry Andric 
5137b1c73532SDimitry Andric   /// Deal with a __kmpc_parallel_51 call (\p CB). Returns true if the call was
5138b1c73532SDimitry Andric   /// handled, if a problem occurred, false is returned.
handleParallel51__anon7bbaa8dc0111::AAKernelInfoCallSite5139b1c73532SDimitry Andric   bool handleParallel51(Attributor &A, CallBase &CB) {
5140b1c73532SDimitry Andric     const unsigned int NonWrapperFunctionArgNo = 5;
5141b1c73532SDimitry Andric     const unsigned int WrapperFunctionArgNo = 6;
5142b1c73532SDimitry Andric     auto ParallelRegionOpArgNo = SPMDCompatibilityTracker.isAssumed()
5143b1c73532SDimitry Andric                                      ? NonWrapperFunctionArgNo
5144b1c73532SDimitry Andric                                      : WrapperFunctionArgNo;
5145b1c73532SDimitry Andric 
5146b1c73532SDimitry Andric     auto *ParallelRegion = dyn_cast<Function>(
5147b1c73532SDimitry Andric         CB.getArgOperand(ParallelRegionOpArgNo)->stripPointerCasts());
5148b1c73532SDimitry Andric     if (!ParallelRegion)
5149b1c73532SDimitry Andric       return false;
5150b1c73532SDimitry Andric 
5151b1c73532SDimitry Andric     ReachedKnownParallelRegions.insert(&CB);
5152b1c73532SDimitry Andric     /// Check nested parallelism
5153b1c73532SDimitry Andric     auto *FnAA = A.getAAFor<AAKernelInfo>(
5154b1c73532SDimitry Andric         *this, IRPosition::function(*ParallelRegion), DepClassTy::OPTIONAL);
5155b1c73532SDimitry Andric     NestedParallelism |= !FnAA || !FnAA->getState().isValidState() ||
5156b1c73532SDimitry Andric                          !FnAA->ReachedKnownParallelRegions.empty() ||
5157b1c73532SDimitry Andric                          !FnAA->ReachedKnownParallelRegions.isValidState() ||
5158b1c73532SDimitry Andric                          !FnAA->ReachedUnknownParallelRegions.isValidState() ||
5159b1c73532SDimitry Andric                          !FnAA->ReachedUnknownParallelRegions.empty();
5160b1c73532SDimitry Andric     return true;
5161b1c73532SDimitry Andric   }
5162344a3780SDimitry Andric };
5163344a3780SDimitry Andric 
5164344a3780SDimitry Andric struct AAFoldRuntimeCall
5165344a3780SDimitry Andric     : public StateWrapper<BooleanState, AbstractAttribute> {
5166344a3780SDimitry Andric   using Base = StateWrapper<BooleanState, AbstractAttribute>;
5167344a3780SDimitry Andric 
AAFoldRuntimeCall__anon7bbaa8dc0111::AAFoldRuntimeCall5168344a3780SDimitry Andric   AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
5169344a3780SDimitry Andric 
5170344a3780SDimitry Andric   /// Statistics are tracked as part of manifest for now.
trackStatistics__anon7bbaa8dc0111::AAFoldRuntimeCall5171344a3780SDimitry Andric   void trackStatistics() const override {}
5172344a3780SDimitry Andric 
5173344a3780SDimitry Andric   /// Create an abstract attribute biew for the position \p IRP.
5174344a3780SDimitry Andric   static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP,
5175344a3780SDimitry Andric                                               Attributor &A);
5176344a3780SDimitry Andric 
5177344a3780SDimitry Andric   /// See AbstractAttribute::getName()
getName__anon7bbaa8dc0111::AAFoldRuntimeCall5178344a3780SDimitry Andric   const std::string getName() const override { return "AAFoldRuntimeCall"; }
5179344a3780SDimitry Andric 
5180344a3780SDimitry Andric   /// See AbstractAttribute::getIdAddr()
getIdAddr__anon7bbaa8dc0111::AAFoldRuntimeCall5181344a3780SDimitry Andric   const char *getIdAddr() const override { return &ID; }
5182344a3780SDimitry Andric 
5183344a3780SDimitry Andric   /// This function should return true if the type of the \p AA is
5184344a3780SDimitry Andric   /// AAFoldRuntimeCall
classof__anon7bbaa8dc0111::AAFoldRuntimeCall5185344a3780SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
5186344a3780SDimitry Andric     return (AA->getIdAddr() == &ID);
5187344a3780SDimitry Andric   }
5188344a3780SDimitry Andric 
5189344a3780SDimitry Andric   static const char ID;
5190344a3780SDimitry Andric };
5191344a3780SDimitry Andric 
5192344a3780SDimitry Andric struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
AAFoldRuntimeCallCallSiteReturned__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5193344a3780SDimitry Andric   AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A)
5194344a3780SDimitry Andric       : AAFoldRuntimeCall(IRP, A) {}
5195344a3780SDimitry Andric 
5196344a3780SDimitry Andric   /// See AbstractAttribute::getAsStr()
getAsStr__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned51977fa27ce4SDimitry Andric   const std::string getAsStr(Attributor *) const override {
5198344a3780SDimitry Andric     if (!isValidState())
5199344a3780SDimitry Andric       return "<invalid>";
5200344a3780SDimitry Andric 
5201344a3780SDimitry Andric     std::string Str("simplified value: ");
5202344a3780SDimitry Andric 
5203145449b1SDimitry Andric     if (!SimplifiedValue)
5204344a3780SDimitry Andric       return Str + std::string("none");
5205344a3780SDimitry Andric 
5206e3b55780SDimitry Andric     if (!*SimplifiedValue)
5207344a3780SDimitry Andric       return Str + std::string("nullptr");
5208344a3780SDimitry Andric 
5209e3b55780SDimitry Andric     if (ConstantInt *CI = dyn_cast<ConstantInt>(*SimplifiedValue))
5210344a3780SDimitry Andric       return Str + std::to_string(CI->getSExtValue());
5211344a3780SDimitry Andric 
5212344a3780SDimitry Andric     return Str + std::string("unknown");
5213344a3780SDimitry Andric   }
5214344a3780SDimitry Andric 
initialize__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5215344a3780SDimitry Andric   void initialize(Attributor &A) override {
5216c0981da4SDimitry Andric     if (DisableOpenMPOptFolding)
5217c0981da4SDimitry Andric       indicatePessimisticFixpoint();
5218c0981da4SDimitry Andric 
5219344a3780SDimitry Andric     Function *Callee = getAssociatedFunction();
5220344a3780SDimitry Andric 
5221344a3780SDimitry Andric     auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
5222344a3780SDimitry Andric     const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
5223344a3780SDimitry Andric     assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&
5224344a3780SDimitry Andric            "Expected a known OpenMP runtime function");
5225344a3780SDimitry Andric 
5226344a3780SDimitry Andric     RFKind = It->getSecond();
5227344a3780SDimitry Andric 
5228344a3780SDimitry Andric     CallBase &CB = cast<CallBase>(getAssociatedValue());
5229344a3780SDimitry Andric     A.registerSimplificationCallback(
5230344a3780SDimitry Andric         IRPosition::callsite_returned(CB),
5231344a3780SDimitry Andric         [&](const IRPosition &IRP, const AbstractAttribute *AA,
5232e3b55780SDimitry Andric             bool &UsedAssumedInformation) -> std::optional<Value *> {
5233145449b1SDimitry Andric           assert((isValidState() ||
5234e3b55780SDimitry Andric                   (SimplifiedValue && *SimplifiedValue == nullptr)) &&
5235344a3780SDimitry Andric                  "Unexpected invalid state!");
5236344a3780SDimitry Andric 
5237344a3780SDimitry Andric           if (!isAtFixpoint()) {
5238344a3780SDimitry Andric             UsedAssumedInformation = true;
5239344a3780SDimitry Andric             if (AA)
5240344a3780SDimitry Andric               A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
5241344a3780SDimitry Andric           }
5242344a3780SDimitry Andric           return SimplifiedValue;
5243344a3780SDimitry Andric         });
5244344a3780SDimitry Andric   }
5245344a3780SDimitry Andric 
updateImpl__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5246344a3780SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
5247344a3780SDimitry Andric     ChangeStatus Changed = ChangeStatus::UNCHANGED;
5248344a3780SDimitry Andric     switch (RFKind) {
5249344a3780SDimitry Andric     case OMPRTL___kmpc_is_spmd_exec_mode:
5250344a3780SDimitry Andric       Changed |= foldIsSPMDExecMode(A);
5251344a3780SDimitry Andric       break;
5252344a3780SDimitry Andric     case OMPRTL___kmpc_parallel_level:
5253344a3780SDimitry Andric       Changed |= foldParallelLevel(A);
5254344a3780SDimitry Andric       break;
5255344a3780SDimitry Andric     case OMPRTL___kmpc_get_hardware_num_threads_in_block:
5256344a3780SDimitry Andric       Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit");
5257344a3780SDimitry Andric       break;
5258344a3780SDimitry Andric     case OMPRTL___kmpc_get_hardware_num_blocks:
5259344a3780SDimitry Andric       Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams");
5260344a3780SDimitry Andric       break;
5261344a3780SDimitry Andric     default:
5262344a3780SDimitry Andric       llvm_unreachable("Unhandled OpenMP runtime function!");
5263344a3780SDimitry Andric     }
5264344a3780SDimitry Andric 
5265344a3780SDimitry Andric     return Changed;
5266344a3780SDimitry Andric   }
5267344a3780SDimitry Andric 
manifest__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5268344a3780SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
5269344a3780SDimitry Andric     ChangeStatus Changed = ChangeStatus::UNCHANGED;
5270344a3780SDimitry Andric 
5271145449b1SDimitry Andric     if (SimplifiedValue && *SimplifiedValue) {
5272c0981da4SDimitry Andric       Instruction &I = *getCtxI();
5273145449b1SDimitry Andric       A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue);
5274c0981da4SDimitry Andric       A.deleteAfterManifest(I);
5275344a3780SDimitry Andric 
5276c0981da4SDimitry Andric       CallBase *CB = dyn_cast<CallBase>(&I);
5277c0981da4SDimitry Andric       auto Remark = [&](OptimizationRemark OR) {
5278c0981da4SDimitry Andric         if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
5279c0981da4SDimitry Andric           return OR << "Replacing OpenMP runtime call "
5280c0981da4SDimitry Andric                     << CB->getCalledFunction()->getName() << " with "
5281c0981da4SDimitry Andric                     << ore::NV("FoldedValue", C->getZExtValue()) << ".";
5282c0981da4SDimitry Andric         return OR << "Replacing OpenMP runtime call "
5283c0981da4SDimitry Andric                   << CB->getCalledFunction()->getName() << ".";
5284c0981da4SDimitry Andric       };
5285c0981da4SDimitry Andric 
5286c0981da4SDimitry Andric       if (CB && EnableVerboseRemarks)
5287c0981da4SDimitry Andric         A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
5288c0981da4SDimitry Andric 
5289c0981da4SDimitry Andric       LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "
5290344a3780SDimitry Andric                         << **SimplifiedValue << "\n");
5291344a3780SDimitry Andric 
5292344a3780SDimitry Andric       Changed = ChangeStatus::CHANGED;
5293344a3780SDimitry Andric     }
5294344a3780SDimitry Andric 
5295344a3780SDimitry Andric     return Changed;
5296344a3780SDimitry Andric   }
5297344a3780SDimitry Andric 
indicatePessimisticFixpoint__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5298344a3780SDimitry Andric   ChangeStatus indicatePessimisticFixpoint() override {
5299344a3780SDimitry Andric     SimplifiedValue = nullptr;
5300344a3780SDimitry Andric     return AAFoldRuntimeCall::indicatePessimisticFixpoint();
5301344a3780SDimitry Andric   }
5302344a3780SDimitry Andric 
5303344a3780SDimitry Andric private:
5304344a3780SDimitry Andric   /// Fold __kmpc_is_spmd_exec_mode into a constant if possible.
foldIsSPMDExecMode__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5305344a3780SDimitry Andric   ChangeStatus foldIsSPMDExecMode(Attributor &A) {
5306e3b55780SDimitry Andric     std::optional<Value *> SimplifiedValueBefore = SimplifiedValue;
5307344a3780SDimitry Andric 
5308344a3780SDimitry Andric     unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
5309344a3780SDimitry Andric     unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
53107fa27ce4SDimitry Andric     auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
5311344a3780SDimitry Andric         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
5312344a3780SDimitry Andric 
53137fa27ce4SDimitry Andric     if (!CallerKernelInfoAA ||
53147fa27ce4SDimitry Andric         !CallerKernelInfoAA->ReachingKernelEntries.isValidState())
5315344a3780SDimitry Andric       return indicatePessimisticFixpoint();
5316344a3780SDimitry Andric 
53177fa27ce4SDimitry Andric     for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
53187fa27ce4SDimitry Andric       auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
5319344a3780SDimitry Andric                                           DepClassTy::REQUIRED);
5320344a3780SDimitry Andric 
53217fa27ce4SDimitry Andric       if (!AA || !AA->isValidState()) {
5322344a3780SDimitry Andric         SimplifiedValue = nullptr;
5323344a3780SDimitry Andric         return indicatePessimisticFixpoint();
5324344a3780SDimitry Andric       }
5325344a3780SDimitry Andric 
53267fa27ce4SDimitry Andric       if (AA->SPMDCompatibilityTracker.isAssumed()) {
53277fa27ce4SDimitry Andric         if (AA->SPMDCompatibilityTracker.isAtFixpoint())
5328344a3780SDimitry Andric           ++KnownSPMDCount;
5329344a3780SDimitry Andric         else
5330344a3780SDimitry Andric           ++AssumedSPMDCount;
5331344a3780SDimitry Andric       } else {
53327fa27ce4SDimitry Andric         if (AA->SPMDCompatibilityTracker.isAtFixpoint())
5333344a3780SDimitry Andric           ++KnownNonSPMDCount;
5334344a3780SDimitry Andric         else
5335344a3780SDimitry Andric           ++AssumedNonSPMDCount;
5336344a3780SDimitry Andric       }
5337344a3780SDimitry Andric     }
5338344a3780SDimitry Andric 
5339344a3780SDimitry Andric     if ((AssumedSPMDCount + KnownSPMDCount) &&
5340344a3780SDimitry Andric         (AssumedNonSPMDCount + KnownNonSPMDCount))
5341344a3780SDimitry Andric       return indicatePessimisticFixpoint();
5342344a3780SDimitry Andric 
5343344a3780SDimitry Andric     auto &Ctx = getAnchorValue().getContext();
5344344a3780SDimitry Andric     if (KnownSPMDCount || AssumedSPMDCount) {
5345344a3780SDimitry Andric       assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
5346344a3780SDimitry Andric              "Expected only SPMD kernels!");
5347344a3780SDimitry Andric       // All reaching kernels are in SPMD mode. Update all function calls to
5348344a3780SDimitry Andric       // __kmpc_is_spmd_exec_mode to 1.
5349344a3780SDimitry Andric       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
5350344a3780SDimitry Andric     } else if (KnownNonSPMDCount || AssumedNonSPMDCount) {
5351344a3780SDimitry Andric       assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
5352344a3780SDimitry Andric              "Expected only non-SPMD kernels!");
5353344a3780SDimitry Andric       // All reaching kernels are in non-SPMD mode. Update all function
5354344a3780SDimitry Andric       // calls to __kmpc_is_spmd_exec_mode to 0.
5355344a3780SDimitry Andric       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false);
5356344a3780SDimitry Andric     } else {
5357344a3780SDimitry Andric       // We have empty reaching kernels, therefore we cannot tell if the
5358344a3780SDimitry Andric       // associated call site can be folded. At this moment, SimplifiedValue
5359344a3780SDimitry Andric       // must be none.
5360145449b1SDimitry Andric       assert(!SimplifiedValue && "SimplifiedValue should be none");
5361344a3780SDimitry Andric     }
5362344a3780SDimitry Andric 
5363344a3780SDimitry Andric     return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
5364344a3780SDimitry Andric                                                     : ChangeStatus::CHANGED;
5365344a3780SDimitry Andric   }
5366344a3780SDimitry Andric 
5367344a3780SDimitry Andric   /// Fold __kmpc_parallel_level into a constant if possible.
foldParallelLevel__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5368344a3780SDimitry Andric   ChangeStatus foldParallelLevel(Attributor &A) {
5369e3b55780SDimitry Andric     std::optional<Value *> SimplifiedValueBefore = SimplifiedValue;
5370344a3780SDimitry Andric 
53717fa27ce4SDimitry Andric     auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
5372344a3780SDimitry Andric         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
5373344a3780SDimitry Andric 
53747fa27ce4SDimitry Andric     if (!CallerKernelInfoAA ||
53757fa27ce4SDimitry Andric         !CallerKernelInfoAA->ParallelLevels.isValidState())
5376344a3780SDimitry Andric       return indicatePessimisticFixpoint();
5377344a3780SDimitry Andric 
53787fa27ce4SDimitry Andric     if (!CallerKernelInfoAA->ReachingKernelEntries.isValidState())
5379344a3780SDimitry Andric       return indicatePessimisticFixpoint();
5380344a3780SDimitry Andric 
53817fa27ce4SDimitry Andric     if (CallerKernelInfoAA->ReachingKernelEntries.empty()) {
5382145449b1SDimitry Andric       assert(!SimplifiedValue &&
5383344a3780SDimitry Andric              "SimplifiedValue should keep none at this point");
5384344a3780SDimitry Andric       return ChangeStatus::UNCHANGED;
5385344a3780SDimitry Andric     }
5386344a3780SDimitry Andric 
5387344a3780SDimitry Andric     unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
5388344a3780SDimitry Andric     unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
53897fa27ce4SDimitry Andric     for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
53907fa27ce4SDimitry Andric       auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
5391344a3780SDimitry Andric                                           DepClassTy::REQUIRED);
53927fa27ce4SDimitry Andric       if (!AA || !AA->SPMDCompatibilityTracker.isValidState())
5393344a3780SDimitry Andric         return indicatePessimisticFixpoint();
5394344a3780SDimitry Andric 
53957fa27ce4SDimitry Andric       if (AA->SPMDCompatibilityTracker.isAssumed()) {
53967fa27ce4SDimitry Andric         if (AA->SPMDCompatibilityTracker.isAtFixpoint())
5397344a3780SDimitry Andric           ++KnownSPMDCount;
5398344a3780SDimitry Andric         else
5399344a3780SDimitry Andric           ++AssumedSPMDCount;
5400344a3780SDimitry Andric       } else {
54017fa27ce4SDimitry Andric         if (AA->SPMDCompatibilityTracker.isAtFixpoint())
5402344a3780SDimitry Andric           ++KnownNonSPMDCount;
5403344a3780SDimitry Andric         else
5404344a3780SDimitry Andric           ++AssumedNonSPMDCount;
5405344a3780SDimitry Andric       }
5406344a3780SDimitry Andric     }
5407344a3780SDimitry Andric 
5408344a3780SDimitry Andric     if ((AssumedSPMDCount + KnownSPMDCount) &&
5409344a3780SDimitry Andric         (AssumedNonSPMDCount + KnownNonSPMDCount))
5410344a3780SDimitry Andric       return indicatePessimisticFixpoint();
5411344a3780SDimitry Andric 
5412344a3780SDimitry Andric     auto &Ctx = getAnchorValue().getContext();
5413344a3780SDimitry Andric     // If the caller can only be reached by SPMD kernel entries, the parallel
5414344a3780SDimitry Andric     // level is 1. Similarly, if the caller can only be reached by non-SPMD
5415344a3780SDimitry Andric     // kernel entries, it is 0.
5416344a3780SDimitry Andric     if (AssumedSPMDCount || KnownSPMDCount) {
5417344a3780SDimitry Andric       assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
5418344a3780SDimitry Andric              "Expected only SPMD kernels!");
5419344a3780SDimitry Andric       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
5420344a3780SDimitry Andric     } else {
5421344a3780SDimitry Andric       assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
5422344a3780SDimitry Andric              "Expected only non-SPMD kernels!");
5423344a3780SDimitry Andric       SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0);
5424344a3780SDimitry Andric     }
5425344a3780SDimitry Andric     return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
5426344a3780SDimitry Andric                                                     : ChangeStatus::CHANGED;
5427344a3780SDimitry Andric   }
5428344a3780SDimitry Andric 
foldKernelFnAttribute__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5429344a3780SDimitry Andric   ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) {
5430344a3780SDimitry Andric     // Specialize only if all the calls agree with the attribute constant value
5431344a3780SDimitry Andric     int32_t CurrentAttrValue = -1;
5432e3b55780SDimitry Andric     std::optional<Value *> SimplifiedValueBefore = SimplifiedValue;
5433344a3780SDimitry Andric 
54347fa27ce4SDimitry Andric     auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
5435344a3780SDimitry Andric         *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
5436344a3780SDimitry Andric 
54377fa27ce4SDimitry Andric     if (!CallerKernelInfoAA ||
54387fa27ce4SDimitry Andric         !CallerKernelInfoAA->ReachingKernelEntries.isValidState())
5439344a3780SDimitry Andric       return indicatePessimisticFixpoint();
5440344a3780SDimitry Andric 
5441344a3780SDimitry Andric     // Iterate over the kernels that reach this function
54427fa27ce4SDimitry Andric     for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
5443e3b55780SDimitry Andric       int32_t NextAttrVal = K->getFnAttributeAsParsedInteger(Attr, -1);
5444344a3780SDimitry Andric 
5445344a3780SDimitry Andric       if (NextAttrVal == -1 ||
5446344a3780SDimitry Andric           (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal))
5447344a3780SDimitry Andric         return indicatePessimisticFixpoint();
5448344a3780SDimitry Andric       CurrentAttrValue = NextAttrVal;
5449344a3780SDimitry Andric     }
5450344a3780SDimitry Andric 
5451344a3780SDimitry Andric     if (CurrentAttrValue != -1) {
5452344a3780SDimitry Andric       auto &Ctx = getAnchorValue().getContext();
5453344a3780SDimitry Andric       SimplifiedValue =
5454344a3780SDimitry Andric           ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue);
5455344a3780SDimitry Andric     }
5456344a3780SDimitry Andric     return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
5457344a3780SDimitry Andric                                                     : ChangeStatus::CHANGED;
5458344a3780SDimitry Andric   }
5459344a3780SDimitry Andric 
5460344a3780SDimitry Andric   /// An optional value the associated value is assumed to fold to. That is, we
5461344a3780SDimitry Andric   /// assume the associated value (which is a call) can be replaced by this
5462344a3780SDimitry Andric   /// simplified value.
5463e3b55780SDimitry Andric   std::optional<Value *> SimplifiedValue;
5464344a3780SDimitry Andric 
5465344a3780SDimitry Andric   /// The runtime function kind of the callee of the associated call site.
5466344a3780SDimitry Andric   RuntimeFunction RFKind;
5467344a3780SDimitry Andric };
5468344a3780SDimitry Andric 
5469cfca06d7SDimitry Andric } // namespace
5470cfca06d7SDimitry Andric 
5471344a3780SDimitry Andric /// Register folding callsite
registerFoldRuntimeCall(RuntimeFunction RF)5472344a3780SDimitry Andric void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
5473344a3780SDimitry Andric   auto &RFI = OMPInfoCache.RFIs[RF];
5474344a3780SDimitry Andric   RFI.foreachUse(SCC, [&](Use &U, Function &F) {
5475344a3780SDimitry Andric     CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI);
5476344a3780SDimitry Andric     if (!CI)
5477344a3780SDimitry Andric       return false;
5478344a3780SDimitry Andric     A.getOrCreateAAFor<AAFoldRuntimeCall>(
5479344a3780SDimitry Andric         IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr,
5480344a3780SDimitry Andric         DepClassTy::NONE, /* ForceUpdate */ false,
5481344a3780SDimitry Andric         /* UpdateAfterInit */ false);
5482344a3780SDimitry Andric     return false;
5483344a3780SDimitry Andric   });
5484344a3780SDimitry Andric }
5485344a3780SDimitry Andric 
registerAAs(bool IsModulePass)5486344a3780SDimitry Andric void OpenMPOpt::registerAAs(bool IsModulePass) {
5487344a3780SDimitry Andric   if (SCC.empty())
5488344a3780SDimitry Andric     return;
5489145449b1SDimitry Andric 
5490344a3780SDimitry Andric   if (IsModulePass) {
5491344a3780SDimitry Andric     // Ensure we create the AAKernelInfo AAs first and without triggering an
5492344a3780SDimitry Andric     // update. This will make sure we register all value simplification
5493344a3780SDimitry Andric     // callbacks before any other AA has the chance to create an AAValueSimplify
5494344a3780SDimitry Andric     // or similar.
5495145449b1SDimitry Andric     auto CreateKernelInfoCB = [&](Use &, Function &Kernel) {
5496344a3780SDimitry Andric       A.getOrCreateAAFor<AAKernelInfo>(
5497145449b1SDimitry Andric           IRPosition::function(Kernel), /* QueryingAA */ nullptr,
5498344a3780SDimitry Andric           DepClassTy::NONE, /* ForceUpdate */ false,
5499344a3780SDimitry Andric           /* UpdateAfterInit */ false);
5500145449b1SDimitry Andric       return false;
5501145449b1SDimitry Andric     };
5502145449b1SDimitry Andric     OMPInformationCache::RuntimeFunctionInfo &InitRFI =
5503145449b1SDimitry Andric         OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
5504145449b1SDimitry Andric     InitRFI.foreachUse(SCC, CreateKernelInfoCB);
5505344a3780SDimitry Andric 
5506344a3780SDimitry Andric     registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
5507344a3780SDimitry Andric     registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
5508344a3780SDimitry Andric     registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block);
5509344a3780SDimitry Andric     registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks);
5510344a3780SDimitry Andric   }
5511344a3780SDimitry Andric 
5512344a3780SDimitry Andric   // Create CallSite AA for all Getters.
5513e3b55780SDimitry Andric   if (DeduceICVValues) {
5514344a3780SDimitry Andric     for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
5515344a3780SDimitry Andric       auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
5516344a3780SDimitry Andric 
5517344a3780SDimitry Andric       auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
5518344a3780SDimitry Andric 
5519344a3780SDimitry Andric       auto CreateAA = [&](Use &U, Function &Caller) {
5520344a3780SDimitry Andric         CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
5521344a3780SDimitry Andric         if (!CI)
5522344a3780SDimitry Andric           return false;
5523344a3780SDimitry Andric 
5524344a3780SDimitry Andric         auto &CB = cast<CallBase>(*CI);
5525344a3780SDimitry Andric 
5526344a3780SDimitry Andric         IRPosition CBPos = IRPosition::callsite_function(CB);
5527344a3780SDimitry Andric         A.getOrCreateAAFor<AAICVTracker>(CBPos);
5528344a3780SDimitry Andric         return false;
5529344a3780SDimitry Andric       };
5530344a3780SDimitry Andric 
5531344a3780SDimitry Andric       GetterRFI.foreachUse(SCC, CreateAA);
5532344a3780SDimitry Andric     }
5533e3b55780SDimitry Andric   }
5534344a3780SDimitry Andric 
5535344a3780SDimitry Andric   // Create an ExecutionDomain AA for every function and a HeapToStack AA for
5536344a3780SDimitry Andric   // every function if there is a device kernel.
5537344a3780SDimitry Andric   if (!isOpenMPDevice(M))
5538344a3780SDimitry Andric     return;
5539344a3780SDimitry Andric 
5540344a3780SDimitry Andric   for (auto *F : SCC) {
5541344a3780SDimitry Andric     if (F->isDeclaration())
5542344a3780SDimitry Andric       continue;
5543344a3780SDimitry Andric 
5544e3b55780SDimitry Andric     // We look at internal functions only on-demand but if any use is not a
5545e3b55780SDimitry Andric     // direct call or outside the current set of analyzed functions, we have
5546e3b55780SDimitry Andric     // to do it eagerly.
5547e3b55780SDimitry Andric     if (F->hasLocalLinkage()) {
5548e3b55780SDimitry Andric       if (llvm::all_of(F->uses(), [this](const Use &U) {
5549e3b55780SDimitry Andric             const auto *CB = dyn_cast<CallBase>(U.getUser());
5550e3b55780SDimitry Andric             return CB && CB->isCallee(&U) &&
5551e3b55780SDimitry Andric                    A.isRunOn(const_cast<Function *>(CB->getCaller()));
5552e3b55780SDimitry Andric           }))
5553e3b55780SDimitry Andric         continue;
5554e3b55780SDimitry Andric     }
5555e3b55780SDimitry Andric     registerAAsForFunction(A, *F);
5556e3b55780SDimitry Andric   }
5557e3b55780SDimitry Andric }
5558344a3780SDimitry Andric 
registerAAsForFunction(Attributor & A,const Function & F)5559e3b55780SDimitry Andric void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
5560e3b55780SDimitry Andric   if (!DisableOpenMPOptDeglobalization)
5561e3b55780SDimitry Andric     A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
5562e3b55780SDimitry Andric   A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(F));
5563e3b55780SDimitry Andric   if (!DisableOpenMPOptDeglobalization)
5564e3b55780SDimitry Andric     A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(F));
55657fa27ce4SDimitry Andric   if (F.hasFnAttribute(Attribute::Convergent))
55667fa27ce4SDimitry Andric     A.getOrCreateAAFor<AANonConvergent>(IRPosition::function(F));
5567e3b55780SDimitry Andric 
5568e3b55780SDimitry Andric   for (auto &I : instructions(F)) {
5569344a3780SDimitry Andric     if (auto *LI = dyn_cast<LoadInst>(&I)) {
5570344a3780SDimitry Andric       bool UsedAssumedInformation = false;
5571344a3780SDimitry Andric       A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
55724b4fe385SDimitry Andric                              UsedAssumedInformation, AA::Interprocedural);
5573e3b55780SDimitry Andric       continue;
5574e3b55780SDimitry Andric     }
5575b1c73532SDimitry Andric     if (auto *CI = dyn_cast<CallBase>(&I)) {
5576b1c73532SDimitry Andric       if (CI->isIndirectCall())
5577b1c73532SDimitry Andric         A.getOrCreateAAFor<AAIndirectCallInfo>(
5578b1c73532SDimitry Andric             IRPosition::callsite_function(*CI));
5579b1c73532SDimitry Andric     }
5580e3b55780SDimitry Andric     if (auto *SI = dyn_cast<StoreInst>(&I)) {
55816f8fc217SDimitry Andric       A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
5582e3b55780SDimitry Andric       continue;
5583e3b55780SDimitry Andric     }
55847fa27ce4SDimitry Andric     if (auto *FI = dyn_cast<FenceInst>(&I)) {
55857fa27ce4SDimitry Andric       A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*FI));
55867fa27ce4SDimitry Andric       continue;
55877fa27ce4SDimitry Andric     }
5588e3b55780SDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
5589e3b55780SDimitry Andric       if (II->getIntrinsicID() == Intrinsic::assume) {
5590e3b55780SDimitry Andric         A.getOrCreateAAFor<AAPotentialValues>(
5591e3b55780SDimitry Andric             IRPosition::value(*II->getArgOperand(0)));
5592e3b55780SDimitry Andric         continue;
5593344a3780SDimitry Andric       }
5594344a3780SDimitry Andric     }
5595344a3780SDimitry Andric   }
5596344a3780SDimitry Andric }
5597344a3780SDimitry Andric 
5598cfca06d7SDimitry Andric const char AAICVTracker::ID = 0;
5599344a3780SDimitry Andric const char AAKernelInfo::ID = 0;
5600344a3780SDimitry Andric const char AAExecutionDomain::ID = 0;
5601344a3780SDimitry Andric const char AAHeapToShared::ID = 0;
5602344a3780SDimitry Andric const char AAFoldRuntimeCall::ID = 0;
5603cfca06d7SDimitry Andric 
createForPosition(const IRPosition & IRP,Attributor & A)5604cfca06d7SDimitry Andric AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
5605cfca06d7SDimitry Andric                                               Attributor &A) {
5606cfca06d7SDimitry Andric   AAICVTracker *AA = nullptr;
5607cfca06d7SDimitry Andric   switch (IRP.getPositionKind()) {
5608cfca06d7SDimitry Andric   case IRPosition::IRP_INVALID:
5609cfca06d7SDimitry Andric   case IRPosition::IRP_FLOAT:
5610cfca06d7SDimitry Andric   case IRPosition::IRP_ARGUMENT:
5611cfca06d7SDimitry Andric   case IRPosition::IRP_CALL_SITE_ARGUMENT:
5612cfca06d7SDimitry Andric     llvm_unreachable("ICVTracker can only be created for function position!");
5613b60736ecSDimitry Andric   case IRPosition::IRP_RETURNED:
5614b60736ecSDimitry Andric     AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
5615b60736ecSDimitry Andric     break;
5616b60736ecSDimitry Andric   case IRPosition::IRP_CALL_SITE_RETURNED:
5617b60736ecSDimitry Andric     AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
5618b60736ecSDimitry Andric     break;
5619b60736ecSDimitry Andric   case IRPosition::IRP_CALL_SITE:
5620b60736ecSDimitry Andric     AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
5621b60736ecSDimitry Andric     break;
5622cfca06d7SDimitry Andric   case IRPosition::IRP_FUNCTION:
5623cfca06d7SDimitry Andric     AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
5624cfca06d7SDimitry Andric     break;
5625cfca06d7SDimitry Andric   }
5626cfca06d7SDimitry Andric 
5627cfca06d7SDimitry Andric   return *AA;
5628cfca06d7SDimitry Andric }
5629cfca06d7SDimitry Andric 
createForPosition(const IRPosition & IRP,Attributor & A)5630344a3780SDimitry Andric AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
5631344a3780SDimitry Andric                                                         Attributor &A) {
5632344a3780SDimitry Andric   AAExecutionDomainFunction *AA = nullptr;
5633344a3780SDimitry Andric   switch (IRP.getPositionKind()) {
5634344a3780SDimitry Andric   case IRPosition::IRP_INVALID:
5635344a3780SDimitry Andric   case IRPosition::IRP_FLOAT:
5636344a3780SDimitry Andric   case IRPosition::IRP_ARGUMENT:
5637344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE_ARGUMENT:
5638344a3780SDimitry Andric   case IRPosition::IRP_RETURNED:
5639344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE_RETURNED:
5640344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE:
5641344a3780SDimitry Andric     llvm_unreachable(
5642344a3780SDimitry Andric         "AAExecutionDomain can only be created for function position!");
5643344a3780SDimitry Andric   case IRPosition::IRP_FUNCTION:
5644344a3780SDimitry Andric     AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
5645344a3780SDimitry Andric     break;
5646344a3780SDimitry Andric   }
5647344a3780SDimitry Andric 
5648344a3780SDimitry Andric   return *AA;
5649344a3780SDimitry Andric }
5650344a3780SDimitry Andric 
createForPosition(const IRPosition & IRP,Attributor & A)5651344a3780SDimitry Andric AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
5652344a3780SDimitry Andric                                                   Attributor &A) {
5653344a3780SDimitry Andric   AAHeapToSharedFunction *AA = nullptr;
5654344a3780SDimitry Andric   switch (IRP.getPositionKind()) {
5655344a3780SDimitry Andric   case IRPosition::IRP_INVALID:
5656344a3780SDimitry Andric   case IRPosition::IRP_FLOAT:
5657344a3780SDimitry Andric   case IRPosition::IRP_ARGUMENT:
5658344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE_ARGUMENT:
5659344a3780SDimitry Andric   case IRPosition::IRP_RETURNED:
5660344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE_RETURNED:
5661344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE:
5662344a3780SDimitry Andric     llvm_unreachable(
5663344a3780SDimitry Andric         "AAHeapToShared can only be created for function position!");
5664344a3780SDimitry Andric   case IRPosition::IRP_FUNCTION:
5665344a3780SDimitry Andric     AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
5666344a3780SDimitry Andric     break;
5667344a3780SDimitry Andric   }
5668344a3780SDimitry Andric 
5669344a3780SDimitry Andric   return *AA;
5670344a3780SDimitry Andric }
5671344a3780SDimitry Andric 
createForPosition(const IRPosition & IRP,Attributor & A)5672344a3780SDimitry Andric AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
5673344a3780SDimitry Andric                                               Attributor &A) {
5674344a3780SDimitry Andric   AAKernelInfo *AA = nullptr;
5675344a3780SDimitry Andric   switch (IRP.getPositionKind()) {
5676344a3780SDimitry Andric   case IRPosition::IRP_INVALID:
5677344a3780SDimitry Andric   case IRPosition::IRP_FLOAT:
5678344a3780SDimitry Andric   case IRPosition::IRP_ARGUMENT:
5679344a3780SDimitry Andric   case IRPosition::IRP_RETURNED:
5680344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE_RETURNED:
5681344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE_ARGUMENT:
5682344a3780SDimitry Andric     llvm_unreachable("KernelInfo can only be created for function position!");
5683344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE:
5684344a3780SDimitry Andric     AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
5685344a3780SDimitry Andric     break;
5686344a3780SDimitry Andric   case IRPosition::IRP_FUNCTION:
5687344a3780SDimitry Andric     AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
5688344a3780SDimitry Andric     break;
5689344a3780SDimitry Andric   }
5690344a3780SDimitry Andric 
5691344a3780SDimitry Andric   return *AA;
5692344a3780SDimitry Andric }
5693344a3780SDimitry Andric 
createForPosition(const IRPosition & IRP,Attributor & A)5694344a3780SDimitry Andric AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP,
5695344a3780SDimitry Andric                                                         Attributor &A) {
5696344a3780SDimitry Andric   AAFoldRuntimeCall *AA = nullptr;
5697344a3780SDimitry Andric   switch (IRP.getPositionKind()) {
5698344a3780SDimitry Andric   case IRPosition::IRP_INVALID:
5699344a3780SDimitry Andric   case IRPosition::IRP_FLOAT:
5700344a3780SDimitry Andric   case IRPosition::IRP_ARGUMENT:
5701344a3780SDimitry Andric   case IRPosition::IRP_RETURNED:
5702344a3780SDimitry Andric   case IRPosition::IRP_FUNCTION:
5703344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE:
5704344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE_ARGUMENT:
5705344a3780SDimitry Andric     llvm_unreachable("KernelInfo can only be created for call site position!");
5706344a3780SDimitry Andric   case IRPosition::IRP_CALL_SITE_RETURNED:
5707344a3780SDimitry Andric     AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A);
5708344a3780SDimitry Andric     break;
5709344a3780SDimitry Andric   }
5710344a3780SDimitry Andric 
5711344a3780SDimitry Andric   return *AA;
5712344a3780SDimitry Andric }
5713344a3780SDimitry Andric 
run(Module & M,ModuleAnalysisManager & AM)5714344a3780SDimitry Andric PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
5715344a3780SDimitry Andric   if (!containsOpenMP(M))
5716344a3780SDimitry Andric     return PreservedAnalyses::all();
5717344a3780SDimitry Andric   if (DisableOpenMPOptimizations)
5718cfca06d7SDimitry Andric     return PreservedAnalyses::all();
5719cfca06d7SDimitry Andric 
5720344a3780SDimitry Andric   FunctionAnalysisManager &FAM =
5721344a3780SDimitry Andric       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
5722344a3780SDimitry Andric   KernelSet Kernels = getDeviceKernels(M);
5723344a3780SDimitry Andric 
5724145449b1SDimitry Andric   if (PrintModuleBeforeOptimizations)
5725145449b1SDimitry Andric     LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M);
5726145449b1SDimitry Andric 
5727344a3780SDimitry Andric   auto IsCalled = [&](Function &F) {
5728344a3780SDimitry Andric     if (Kernels.contains(&F))
5729344a3780SDimitry Andric       return true;
5730344a3780SDimitry Andric     for (const User *U : F.users())
5731344a3780SDimitry Andric       if (!isa<BlockAddress>(U))
5732344a3780SDimitry Andric         return true;
5733344a3780SDimitry Andric     return false;
5734344a3780SDimitry Andric   };
5735344a3780SDimitry Andric 
5736344a3780SDimitry Andric   auto EmitRemark = [&](Function &F) {
5737344a3780SDimitry Andric     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
5738344a3780SDimitry Andric     ORE.emit([&]() {
5739344a3780SDimitry Andric       OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
5740344a3780SDimitry Andric       return ORA << "Could not internalize function. "
5741c0981da4SDimitry Andric                  << "Some optimizations may not be possible. [OMP140]";
5742344a3780SDimitry Andric     });
5743344a3780SDimitry Andric   };
5744344a3780SDimitry Andric 
57457fa27ce4SDimitry Andric   bool Changed = false;
57467fa27ce4SDimitry Andric 
5747344a3780SDimitry Andric   // Create internal copies of each function if this is a kernel Module. This
5748344a3780SDimitry Andric   // allows iterprocedural passes to see every call edge.
5749c0981da4SDimitry Andric   DenseMap<Function *, Function *> InternalizedMap;
5750c0981da4SDimitry Andric   if (isOpenMPDevice(M)) {
5751c0981da4SDimitry Andric     SmallPtrSet<Function *, 16> InternalizeFns;
5752344a3780SDimitry Andric     for (Function &F : M)
5753344a3780SDimitry Andric       if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
5754344a3780SDimitry Andric           !DisableInternalization) {
5755c0981da4SDimitry Andric         if (Attributor::isInternalizable(F)) {
5756c0981da4SDimitry Andric           InternalizeFns.insert(&F);
5757344a3780SDimitry Andric         } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
5758344a3780SDimitry Andric           EmitRemark(F);
5759344a3780SDimitry Andric         }
5760344a3780SDimitry Andric       }
5761344a3780SDimitry Andric 
57627fa27ce4SDimitry Andric     Changed |=
5763c0981da4SDimitry Andric         Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
5764c0981da4SDimitry Andric   }
5765c0981da4SDimitry Andric 
5766344a3780SDimitry Andric   // Look at every function in the Module unless it was internalized.
5767e3b55780SDimitry Andric   SetVector<Function *> Functions;
5768344a3780SDimitry Andric   SmallVector<Function *, 16> SCC;
5769344a3780SDimitry Andric   for (Function &F : M)
5770e3b55780SDimitry Andric     if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) {
5771344a3780SDimitry Andric       SCC.push_back(&F);
5772e3b55780SDimitry Andric       Functions.insert(&F);
5773e3b55780SDimitry Andric     }
5774344a3780SDimitry Andric 
5775344a3780SDimitry Andric   if (SCC.empty())
57767fa27ce4SDimitry Andric     return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
5777344a3780SDimitry Andric 
5778344a3780SDimitry Andric   AnalysisGetter AG(FAM);
5779344a3780SDimitry Andric 
5780344a3780SDimitry Andric   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
5781344a3780SDimitry Andric     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
5782344a3780SDimitry Andric   };
5783344a3780SDimitry Andric 
5784344a3780SDimitry Andric   BumpPtrAllocator Allocator;
5785344a3780SDimitry Andric   CallGraphUpdater CGUpdater;
5786344a3780SDimitry Andric 
57877fa27ce4SDimitry Andric   bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
57887fa27ce4SDimitry Andric                   LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
57897fa27ce4SDimitry Andric   OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, PostLink);
5790344a3780SDimitry Andric 
5791c0981da4SDimitry Andric   unsigned MaxFixpointIterations =
5792c0981da4SDimitry Andric       (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
5793145449b1SDimitry Andric 
5794145449b1SDimitry Andric   AttributorConfig AC(CGUpdater);
5795145449b1SDimitry Andric   AC.DefaultInitializeLiveInternals = false;
5796e3b55780SDimitry Andric   AC.IsModulePass = true;
5797145449b1SDimitry Andric   AC.RewriteSignatures = false;
5798145449b1SDimitry Andric   AC.MaxFixpointIterations = MaxFixpointIterations;
5799145449b1SDimitry Andric   AC.OREGetter = OREGetter;
5800145449b1SDimitry Andric   AC.PassName = DEBUG_TYPE;
5801e3b55780SDimitry Andric   AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
58027fa27ce4SDimitry Andric   AC.IPOAmendableCB = [](const Function &F) {
58037fa27ce4SDimitry Andric     return F.hasFnAttribute("kernel");
58047fa27ce4SDimitry Andric   };
5805145449b1SDimitry Andric 
5806145449b1SDimitry Andric   Attributor A(Functions, InfoCache, AC);
5807344a3780SDimitry Andric 
5808344a3780SDimitry Andric   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
58097fa27ce4SDimitry Andric   Changed |= OMPOpt.run(true);
5810c0981da4SDimitry Andric 
5811c0981da4SDimitry Andric   // Optionally inline device functions for potentially better performance.
5812c0981da4SDimitry Andric   if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
5813c0981da4SDimitry Andric     for (Function &F : M)
5814c0981da4SDimitry Andric       if (!F.isDeclaration() && !Kernels.contains(&F) &&
5815c0981da4SDimitry Andric           !F.hasFnAttribute(Attribute::NoInline))
5816c0981da4SDimitry Andric         F.addFnAttr(Attribute::AlwaysInline);
5817c0981da4SDimitry Andric 
5818c0981da4SDimitry Andric   if (PrintModuleAfterOptimizations)
5819c0981da4SDimitry Andric     LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);
5820c0981da4SDimitry Andric 
5821344a3780SDimitry Andric   if (Changed)
5822344a3780SDimitry Andric     return PreservedAnalyses::none();
5823344a3780SDimitry Andric 
5824344a3780SDimitry Andric   return PreservedAnalyses::all();
5825344a3780SDimitry Andric }
5826344a3780SDimitry Andric 
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult & UR)5827344a3780SDimitry Andric PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
5828344a3780SDimitry Andric                                           CGSCCAnalysisManager &AM,
5829344a3780SDimitry Andric                                           LazyCallGraph &CG,
5830344a3780SDimitry Andric                                           CGSCCUpdateResult &UR) {
5831344a3780SDimitry Andric   if (!containsOpenMP(*C.begin()->getFunction().getParent()))
5832344a3780SDimitry Andric     return PreservedAnalyses::all();
5833cfca06d7SDimitry Andric   if (DisableOpenMPOptimizations)
5834cfca06d7SDimitry Andric     return PreservedAnalyses::all();
5835cfca06d7SDimitry Andric 
5836cfca06d7SDimitry Andric   SmallVector<Function *, 16> SCC;
5837b60736ecSDimitry Andric   // If there are kernels in the module, we have to run on all SCC's.
5838b60736ecSDimitry Andric   for (LazyCallGraph::Node &N : C) {
5839b60736ecSDimitry Andric     Function *Fn = &N.getFunction();
5840b60736ecSDimitry Andric     SCC.push_back(Fn);
5841b60736ecSDimitry Andric   }
5842b60736ecSDimitry Andric 
5843344a3780SDimitry Andric   if (SCC.empty())
5844cfca06d7SDimitry Andric     return PreservedAnalyses::all();
5845cfca06d7SDimitry Andric 
5846344a3780SDimitry Andric   Module &M = *C.begin()->getFunction().getParent();
5847344a3780SDimitry Andric 
5848145449b1SDimitry Andric   if (PrintModuleBeforeOptimizations)
5849145449b1SDimitry Andric     LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M);
5850145449b1SDimitry Andric 
5851344a3780SDimitry Andric   KernelSet Kernels = getDeviceKernels(M);
5852344a3780SDimitry Andric 
5853cfca06d7SDimitry Andric   FunctionAnalysisManager &FAM =
5854cfca06d7SDimitry Andric       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
5855cfca06d7SDimitry Andric 
5856cfca06d7SDimitry Andric   AnalysisGetter AG(FAM);
5857cfca06d7SDimitry Andric 
5858cfca06d7SDimitry Andric   auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
5859cfca06d7SDimitry Andric     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
5860cfca06d7SDimitry Andric   };
5861cfca06d7SDimitry Andric 
5862344a3780SDimitry Andric   BumpPtrAllocator Allocator;
5863cfca06d7SDimitry Andric   CallGraphUpdater CGUpdater;
5864cfca06d7SDimitry Andric   CGUpdater.initialize(CG, C, AM, UR);
5865cfca06d7SDimitry Andric 
58667fa27ce4SDimitry Andric   bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
58677fa27ce4SDimitry Andric                   LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
5868cfca06d7SDimitry Andric   SetVector<Function *> Functions(SCC.begin(), SCC.end());
5869cfca06d7SDimitry Andric   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
58707fa27ce4SDimitry Andric                                 /*CGSCC*/ &Functions, PostLink);
5871cfca06d7SDimitry Andric 
5872c0981da4SDimitry Andric   unsigned MaxFixpointIterations =
5873c0981da4SDimitry Andric       (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
5874145449b1SDimitry Andric 
5875145449b1SDimitry Andric   AttributorConfig AC(CGUpdater);
5876145449b1SDimitry Andric   AC.DefaultInitializeLiveInternals = false;
5877145449b1SDimitry Andric   AC.IsModulePass = false;
5878145449b1SDimitry Andric   AC.RewriteSignatures = false;
5879145449b1SDimitry Andric   AC.MaxFixpointIterations = MaxFixpointIterations;
5880145449b1SDimitry Andric   AC.OREGetter = OREGetter;
5881145449b1SDimitry Andric   AC.PassName = DEBUG_TYPE;
5882e3b55780SDimitry Andric   AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
5883145449b1SDimitry Andric 
5884145449b1SDimitry Andric   Attributor A(Functions, InfoCache, AC);
5885cfca06d7SDimitry Andric 
5886cfca06d7SDimitry Andric   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
5887344a3780SDimitry Andric   bool Changed = OMPOpt.run(false);
5888c0981da4SDimitry Andric 
5889c0981da4SDimitry Andric   if (PrintModuleAfterOptimizations)
5890c0981da4SDimitry Andric     LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
5891c0981da4SDimitry Andric 
5892cfca06d7SDimitry Andric   if (Changed)
5893cfca06d7SDimitry Andric     return PreservedAnalyses::none();
5894cfca06d7SDimitry Andric 
5895cfca06d7SDimitry Andric   return PreservedAnalyses::all();
5896cfca06d7SDimitry Andric }
5897cfca06d7SDimitry Andric 
isOpenMPKernel(Function & Fn)5898b1c73532SDimitry Andric bool llvm::omp::isOpenMPKernel(Function &Fn) {
5899b1c73532SDimitry Andric   return Fn.hasFnAttribute("kernel");
5900b1c73532SDimitry Andric }
59017fa27ce4SDimitry Andric 
getDeviceKernels(Module & M)5902344a3780SDimitry Andric KernelSet llvm::omp::getDeviceKernels(Module &M) {
5903344a3780SDimitry Andric   // TODO: Create a more cross-platform way of determining device kernels.
5904e3b55780SDimitry Andric   NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations");
5905344a3780SDimitry Andric   KernelSet Kernels;
5906344a3780SDimitry Andric 
5907cfca06d7SDimitry Andric   if (!MD)
5908344a3780SDimitry Andric     return Kernels;
5909cfca06d7SDimitry Andric 
5910cfca06d7SDimitry Andric   for (auto *Op : MD->operands()) {
5911cfca06d7SDimitry Andric     if (Op->getNumOperands() < 2)
5912cfca06d7SDimitry Andric       continue;
5913cfca06d7SDimitry Andric     MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
5914cfca06d7SDimitry Andric     if (!KindID || KindID->getString() != "kernel")
5915cfca06d7SDimitry Andric       continue;
5916cfca06d7SDimitry Andric 
5917cfca06d7SDimitry Andric     Function *KernelFn =
5918cfca06d7SDimitry Andric         mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
5919cfca06d7SDimitry Andric     if (!KernelFn)
5920cfca06d7SDimitry Andric       continue;
5921cfca06d7SDimitry Andric 
5922b1c73532SDimitry Andric     // We are only interested in OpenMP target regions. Others, such as kernels
5923b1c73532SDimitry Andric     // generated by CUDA but linked together, are not interesting to this pass.
5924b1c73532SDimitry Andric     if (isOpenMPKernel(*KernelFn)) {
5925cfca06d7SDimitry Andric       ++NumOpenMPTargetRegionKernels;
5926cfca06d7SDimitry Andric       Kernels.insert(KernelFn);
5927b1c73532SDimitry Andric     } else
5928b1c73532SDimitry Andric       ++NumNonOpenMPTargetRegionKernels;
5929cfca06d7SDimitry Andric   }
5930344a3780SDimitry Andric 
5931344a3780SDimitry Andric   return Kernels;
5932cfca06d7SDimitry Andric }
5933cfca06d7SDimitry Andric 
containsOpenMP(Module & M)5934344a3780SDimitry Andric bool llvm::omp::containsOpenMP(Module &M) {
5935344a3780SDimitry Andric   Metadata *MD = M.getModuleFlag("openmp");
5936344a3780SDimitry Andric   if (!MD)
5937344a3780SDimitry Andric     return false;
5938cfca06d7SDimitry Andric 
5939cfca06d7SDimitry Andric   return true;
5940cfca06d7SDimitry Andric }
5941cfca06d7SDimitry Andric 
isOpenMPDevice(Module & M)5942344a3780SDimitry Andric bool llvm::omp::isOpenMPDevice(Module &M) {
5943344a3780SDimitry Andric   Metadata *MD = M.getModuleFlag("openmp-device");
5944344a3780SDimitry Andric   if (!MD)
5945344a3780SDimitry Andric     return false;
5946344a3780SDimitry Andric 
5947344a3780SDimitry Andric   return true;
5948cfca06d7SDimitry Andric }
5949