1cfca06d7SDimitry Andric //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2cfca06d7SDimitry Andric //
3cfca06d7SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4cfca06d7SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5cfca06d7SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6cfca06d7SDimitry Andric //
7cfca06d7SDimitry Andric //===----------------------------------------------------------------------===//
8cfca06d7SDimitry Andric //
9cfca06d7SDimitry Andric // OpenMP specific optimizations:
10cfca06d7SDimitry Andric //
11cfca06d7SDimitry Andric // - Deduplication of runtime calls, e.g., omp_get_thread_num.
12344a3780SDimitry Andric // - Replacing globalized device memory with stack memory.
13344a3780SDimitry Andric // - Replacing globalized device memory with shared memory.
14344a3780SDimitry Andric // - Parallel region merging.
15344a3780SDimitry Andric // - Transforming generic-mode device kernels to SPMD mode.
16344a3780SDimitry Andric // - Specializing the state machine for generic-mode device kernels.
17cfca06d7SDimitry Andric //
18cfca06d7SDimitry Andric //===----------------------------------------------------------------------===//
19cfca06d7SDimitry Andric
20cfca06d7SDimitry Andric #include "llvm/Transforms/IPO/OpenMPOpt.h"
21cfca06d7SDimitry Andric
22cfca06d7SDimitry Andric #include "llvm/ADT/EnumeratedArray.h"
23344a3780SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
246f8fc217SDimitry Andric #include "llvm/ADT/SetVector.h"
257fa27ce4SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
26e3b55780SDimitry Andric #include "llvm/ADT/SmallVector.h"
27cfca06d7SDimitry Andric #include "llvm/ADT/Statistic.h"
287fa27ce4SDimitry Andric #include "llvm/ADT/StringExtras.h"
29c0981da4SDimitry Andric #include "llvm/ADT/StringRef.h"
30cfca06d7SDimitry Andric #include "llvm/Analysis/CallGraph.h"
31cfca06d7SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
32ecbca9f5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
33cfca06d7SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
34b60736ecSDimitry Andric #include "llvm/Analysis/ValueTracking.h"
35cfca06d7SDimitry Andric #include "llvm/Frontend/OpenMP/OMPConstants.h"
36b1c73532SDimitry Andric #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
37cfca06d7SDimitry Andric #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
38344a3780SDimitry Andric #include "llvm/IR/Assumptions.h"
39e3b55780SDimitry Andric #include "llvm/IR/BasicBlock.h"
40ecbca9f5SDimitry Andric #include "llvm/IR/Constants.h"
41344a3780SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
427fa27ce4SDimitry Andric #include "llvm/IR/Dominators.h"
437fa27ce4SDimitry Andric #include "llvm/IR/Function.h"
44344a3780SDimitry Andric #include "llvm/IR/GlobalValue.h"
45ecbca9f5SDimitry Andric #include "llvm/IR/GlobalVariable.h"
46b1c73532SDimitry Andric #include "llvm/IR/InstrTypes.h"
47344a3780SDimitry Andric #include "llvm/IR/Instruction.h"
48ecbca9f5SDimitry Andric #include "llvm/IR/Instructions.h"
49344a3780SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
50c0981da4SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
51c0981da4SDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h"
52ecbca9f5SDimitry Andric #include "llvm/IR/LLVMContext.h"
537fa27ce4SDimitry Andric #include "llvm/Support/Casting.h"
54cfca06d7SDimitry Andric #include "llvm/Support/CommandLine.h"
55ecbca9f5SDimitry Andric #include "llvm/Support/Debug.h"
56cfca06d7SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h"
57b60736ecSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
58cfca06d7SDimitry Andric #include "llvm/Transforms/Utils/CallGraphUpdater.h"
59cfca06d7SDimitry Andric
60c0981da4SDimitry Andric #include <algorithm>
61e3b55780SDimitry Andric #include <optional>
62e3b55780SDimitry Andric #include <string>
63c0981da4SDimitry Andric
64cfca06d7SDimitry Andric using namespace llvm;
65cfca06d7SDimitry Andric using namespace omp;
66cfca06d7SDimitry Andric
67cfca06d7SDimitry Andric #define DEBUG_TYPE "openmp-opt"
68cfca06d7SDimitry Andric
69cfca06d7SDimitry Andric static cl::opt<bool> DisableOpenMPOptimizations(
70145449b1SDimitry Andric "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."),
71145449b1SDimitry Andric cl::Hidden, cl::init(false));
72cfca06d7SDimitry Andric
73b60736ecSDimitry Andric static cl::opt<bool> EnableParallelRegionMerging(
74145449b1SDimitry Andric "openmp-opt-enable-merging",
75b60736ecSDimitry Andric cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
76b60736ecSDimitry Andric cl::init(false));
77b60736ecSDimitry Andric
78344a3780SDimitry Andric static cl::opt<bool>
79145449b1SDimitry Andric DisableInternalization("openmp-opt-disable-internalization",
80344a3780SDimitry Andric cl::desc("Disable function internalization."),
81344a3780SDimitry Andric cl::Hidden, cl::init(false));
82344a3780SDimitry Andric
83e3b55780SDimitry Andric static cl::opt<bool> DeduceICVValues("openmp-deduce-icv-values",
84e3b55780SDimitry Andric cl::init(false), cl::Hidden);
85cfca06d7SDimitry Andric static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
86cfca06d7SDimitry Andric cl::Hidden);
87cfca06d7SDimitry Andric static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
88cfca06d7SDimitry Andric cl::init(false), cl::Hidden);
89cfca06d7SDimitry Andric
90b60736ecSDimitry Andric static cl::opt<bool> HideMemoryTransferLatency(
91b60736ecSDimitry Andric "openmp-hide-memory-transfer-latency",
92b60736ecSDimitry Andric cl::desc("[WIP] Tries to hide the latency of host to device memory"
93b60736ecSDimitry Andric " transfers"),
94b60736ecSDimitry Andric cl::Hidden, cl::init(false));
95b60736ecSDimitry Andric
96c0981da4SDimitry Andric static cl::opt<bool> DisableOpenMPOptDeglobalization(
97145449b1SDimitry Andric "openmp-opt-disable-deglobalization",
98c0981da4SDimitry Andric cl::desc("Disable OpenMP optimizations involving deglobalization."),
99c0981da4SDimitry Andric cl::Hidden, cl::init(false));
100c0981da4SDimitry Andric
101c0981da4SDimitry Andric static cl::opt<bool> DisableOpenMPOptSPMDization(
102145449b1SDimitry Andric "openmp-opt-disable-spmdization",
103c0981da4SDimitry Andric cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
104c0981da4SDimitry Andric cl::Hidden, cl::init(false));
105c0981da4SDimitry Andric
106c0981da4SDimitry Andric static cl::opt<bool> DisableOpenMPOptFolding(
107145449b1SDimitry Andric "openmp-opt-disable-folding",
108c0981da4SDimitry Andric cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
109c0981da4SDimitry Andric cl::init(false));
110c0981da4SDimitry Andric
111c0981da4SDimitry Andric static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
112145449b1SDimitry Andric "openmp-opt-disable-state-machine-rewrite",
113c0981da4SDimitry Andric cl::desc("Disable OpenMP optimizations that replace the state machine."),
114c0981da4SDimitry Andric cl::Hidden, cl::init(false));
115c0981da4SDimitry Andric
116ecbca9f5SDimitry Andric static cl::opt<bool> DisableOpenMPOptBarrierElimination(
117145449b1SDimitry Andric "openmp-opt-disable-barrier-elimination",
118ecbca9f5SDimitry Andric cl::desc("Disable OpenMP optimizations that eliminate barriers."),
119ecbca9f5SDimitry Andric cl::Hidden, cl::init(false));
120ecbca9f5SDimitry Andric
121c0981da4SDimitry Andric static cl::opt<bool> PrintModuleAfterOptimizations(
122145449b1SDimitry Andric "openmp-opt-print-module-after",
123c0981da4SDimitry Andric cl::desc("Print the current module after OpenMP optimizations."),
124c0981da4SDimitry Andric cl::Hidden, cl::init(false));
125c0981da4SDimitry Andric
126145449b1SDimitry Andric static cl::opt<bool> PrintModuleBeforeOptimizations(
127145449b1SDimitry Andric "openmp-opt-print-module-before",
128145449b1SDimitry Andric cl::desc("Print the current module before OpenMP optimizations."),
129145449b1SDimitry Andric cl::Hidden, cl::init(false));
130145449b1SDimitry Andric
131c0981da4SDimitry Andric static cl::opt<bool> AlwaysInlineDeviceFunctions(
132145449b1SDimitry Andric "openmp-opt-inline-device",
133c0981da4SDimitry Andric cl::desc("Inline all applicible functions on the device."), cl::Hidden,
134c0981da4SDimitry Andric cl::init(false));
135c0981da4SDimitry Andric
136c0981da4SDimitry Andric static cl::opt<bool>
137145449b1SDimitry Andric EnableVerboseRemarks("openmp-opt-verbose-remarks",
138c0981da4SDimitry Andric cl::desc("Enables more verbose remarks."), cl::Hidden,
139c0981da4SDimitry Andric cl::init(false));
140c0981da4SDimitry Andric
141c0981da4SDimitry Andric static cl::opt<unsigned>
142c0981da4SDimitry Andric SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
143c0981da4SDimitry Andric cl::desc("Maximal number of attributor iterations."),
144c0981da4SDimitry Andric cl::init(256));
145c0981da4SDimitry Andric
146145449b1SDimitry Andric static cl::opt<unsigned>
147145449b1SDimitry Andric SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden,
148145449b1SDimitry Andric cl::desc("Maximum amount of shared memory to use."),
149145449b1SDimitry Andric cl::init(std::numeric_limits<unsigned>::max()));
150145449b1SDimitry Andric
151cfca06d7SDimitry Andric STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
152cfca06d7SDimitry Andric "Number of OpenMP runtime calls deduplicated");
153cfca06d7SDimitry Andric STATISTIC(NumOpenMPParallelRegionsDeleted,
154cfca06d7SDimitry Andric "Number of OpenMP parallel regions deleted");
155cfca06d7SDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
156cfca06d7SDimitry Andric "Number of OpenMP runtime functions identified");
157cfca06d7SDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
158cfca06d7SDimitry Andric "Number of OpenMP runtime function uses identified");
159cfca06d7SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernels,
160cfca06d7SDimitry Andric "Number of OpenMP target region entry points (=kernels) identified");
161b1c73532SDimitry Andric STATISTIC(NumNonOpenMPTargetRegionKernels,
162b1c73532SDimitry Andric "Number of non-OpenMP target region kernels identified");
163344a3780SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsSPMD,
164344a3780SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in "
165344a3780SDimitry Andric "SPMD-mode instead of generic-mode");
166344a3780SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,
167344a3780SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in "
168344a3780SDimitry Andric "generic-mode without a state machines");
169344a3780SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,
170344a3780SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in "
171344a3780SDimitry Andric "generic-mode with customized state machines with fallback");
172344a3780SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,
173344a3780SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in "
174344a3780SDimitry Andric "generic-mode with customized state machines without fallback");
175cfca06d7SDimitry Andric STATISTIC(
176cfca06d7SDimitry Andric NumOpenMPParallelRegionsReplacedInGPUStateMachine,
177cfca06d7SDimitry Andric "Number of OpenMP parallel regions replaced with ID in GPU state machines");
178b60736ecSDimitry Andric STATISTIC(NumOpenMPParallelRegionsMerged,
179b60736ecSDimitry Andric "Number of OpenMP parallel regions merged");
180344a3780SDimitry Andric STATISTIC(NumBytesMovedToSharedMemory,
181344a3780SDimitry Andric "Amount of memory pushed to shared memory");
182ecbca9f5SDimitry Andric STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
183cfca06d7SDimitry Andric
184cfca06d7SDimitry Andric #if !defined(NDEBUG)
185cfca06d7SDimitry Andric static constexpr auto TAG = "[" DEBUG_TYPE "]";
186cfca06d7SDimitry Andric #endif
187cfca06d7SDimitry Andric
188b1c73532SDimitry Andric namespace KernelInfo {
189b1c73532SDimitry Andric
190b1c73532SDimitry Andric // struct ConfigurationEnvironmentTy {
191b1c73532SDimitry Andric // uint8_t UseGenericStateMachine;
192b1c73532SDimitry Andric // uint8_t MayUseNestedParallelism;
193b1c73532SDimitry Andric // llvm::omp::OMPTgtExecModeFlags ExecMode;
194b1c73532SDimitry Andric // int32_t MinThreads;
195b1c73532SDimitry Andric // int32_t MaxThreads;
196b1c73532SDimitry Andric // int32_t MinTeams;
197b1c73532SDimitry Andric // int32_t MaxTeams;
198b1c73532SDimitry Andric // };
199b1c73532SDimitry Andric
200b1c73532SDimitry Andric // struct DynamicEnvironmentTy {
201b1c73532SDimitry Andric // uint16_t DebugIndentionLevel;
202b1c73532SDimitry Andric // };
203b1c73532SDimitry Andric
204b1c73532SDimitry Andric // struct KernelEnvironmentTy {
205b1c73532SDimitry Andric // ConfigurationEnvironmentTy Configuration;
206b1c73532SDimitry Andric // IdentTy *Ident;
207b1c73532SDimitry Andric // DynamicEnvironmentTy *DynamicEnv;
208b1c73532SDimitry Andric // };
209b1c73532SDimitry Andric
210b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \
211b1c73532SDimitry Andric constexpr const unsigned MEMBER##Idx = IDX;
212b1c73532SDimitry Andric
213b1c73532SDimitry Andric KERNEL_ENVIRONMENT_IDX(Configuration, 0)
214b1c73532SDimitry Andric KERNEL_ENVIRONMENT_IDX(Ident, 1)
215b1c73532SDimitry Andric
216b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_IDX
217b1c73532SDimitry Andric
218b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \
219b1c73532SDimitry Andric constexpr const unsigned MEMBER##Idx = IDX;
220b1c73532SDimitry Andric
221b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
222b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
223b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2)
224b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinThreads, 3)
225b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxThreads, 4)
226b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinTeams, 5)
227b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxTeams, 6)
228b1c73532SDimitry Andric
229b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX
230b1c73532SDimitry Andric
231b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE) \
232b1c73532SDimitry Andric RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \
233b1c73532SDimitry Andric return cast<RETURNTYPE>(KernelEnvC->getAggregateElement(MEMBER##Idx)); \
234b1c73532SDimitry Andric }
235b1c73532SDimitry Andric
KERNEL_ENVIRONMENT_GETTER(Ident,Constant)236b1c73532SDimitry Andric KERNEL_ENVIRONMENT_GETTER(Ident, Constant)
237b1c73532SDimitry Andric KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct)
238b1c73532SDimitry Andric
239b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_GETTER
240b1c73532SDimitry Andric
241b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER) \
242b1c73532SDimitry Andric ConstantInt *get##MEMBER##FromKernelEnvironment( \
243b1c73532SDimitry Andric ConstantStruct *KernelEnvC) { \
244b1c73532SDimitry Andric ConstantStruct *ConfigC = \
245b1c73532SDimitry Andric getConfigurationFromKernelEnvironment(KernelEnvC); \
246b1c73532SDimitry Andric return dyn_cast<ConstantInt>(ConfigC->getAggregateElement(MEMBER##Idx)); \
247b1c73532SDimitry Andric }
248b1c73532SDimitry Andric
249b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine)
250b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism)
251b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode)
252b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinThreads)
253b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxThreads)
254b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinTeams)
255b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)
256b1c73532SDimitry Andric
257b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER
258b1c73532SDimitry Andric
259b1c73532SDimitry Andric GlobalVariable *
260b1c73532SDimitry Andric getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) {
261b1c73532SDimitry Andric constexpr const int InitKernelEnvironmentArgNo = 0;
262b1c73532SDimitry Andric return cast<GlobalVariable>(
263b1c73532SDimitry Andric KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)
264b1c73532SDimitry Andric ->stripPointerCasts());
265b1c73532SDimitry Andric }
266b1c73532SDimitry Andric
getKernelEnvironementFromKernelInitCB(CallBase * KernelInitCB)267b1c73532SDimitry Andric ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) {
268b1c73532SDimitry Andric GlobalVariable *KernelEnvGV =
269b1c73532SDimitry Andric getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
270b1c73532SDimitry Andric return cast<ConstantStruct>(KernelEnvGV->getInitializer());
271b1c73532SDimitry Andric }
272b1c73532SDimitry Andric } // namespace KernelInfo
273b1c73532SDimitry Andric
274cfca06d7SDimitry Andric namespace {
275cfca06d7SDimitry Andric
276344a3780SDimitry Andric struct AAHeapToShared;
277344a3780SDimitry Andric
278cfca06d7SDimitry Andric struct AAICVTracker;
279cfca06d7SDimitry Andric
280cfca06d7SDimitry Andric /// OpenMP specific information. For now, stores RFIs and ICVs also needed for
281cfca06d7SDimitry Andric /// Attributor runs.
282cfca06d7SDimitry Andric struct OMPInformationCache : public InformationCache {
OMPInformationCache__anon7bbaa8dc0111::OMPInformationCache283cfca06d7SDimitry Andric OMPInformationCache(Module &M, AnalysisGetter &AG,
284e3b55780SDimitry Andric BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
2857fa27ce4SDimitry Andric bool OpenMPPostLink)
286e3b55780SDimitry Andric : InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
2877fa27ce4SDimitry Andric OpenMPPostLink(OpenMPPostLink) {
288cfca06d7SDimitry Andric
289b1c73532SDimitry Andric OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M);
290cfca06d7SDimitry Andric OMPBuilder.initialize();
291e3b55780SDimitry Andric initializeRuntimeFunctions(M);
292cfca06d7SDimitry Andric initializeInternalControlVars();
293cfca06d7SDimitry Andric }
294cfca06d7SDimitry Andric
295cfca06d7SDimitry Andric /// Generic information that describes an internal control variable.
296cfca06d7SDimitry Andric struct InternalControlVarInfo {
297cfca06d7SDimitry Andric /// The kind, as described by InternalControlVar enum.
298cfca06d7SDimitry Andric InternalControlVar Kind;
299cfca06d7SDimitry Andric
300cfca06d7SDimitry Andric /// The name of the ICV.
301cfca06d7SDimitry Andric StringRef Name;
302cfca06d7SDimitry Andric
303cfca06d7SDimitry Andric /// Environment variable associated with this ICV.
304cfca06d7SDimitry Andric StringRef EnvVarName;
305cfca06d7SDimitry Andric
306cfca06d7SDimitry Andric /// Initial value kind.
307cfca06d7SDimitry Andric ICVInitValue InitKind;
308cfca06d7SDimitry Andric
309cfca06d7SDimitry Andric /// Initial value.
310cfca06d7SDimitry Andric ConstantInt *InitValue;
311cfca06d7SDimitry Andric
312cfca06d7SDimitry Andric /// Setter RTL function associated with this ICV.
313cfca06d7SDimitry Andric RuntimeFunction Setter;
314cfca06d7SDimitry Andric
315cfca06d7SDimitry Andric /// Getter RTL function associated with this ICV.
316cfca06d7SDimitry Andric RuntimeFunction Getter;
317cfca06d7SDimitry Andric
318cfca06d7SDimitry Andric /// RTL Function corresponding to the override clause of this ICV
319cfca06d7SDimitry Andric RuntimeFunction Clause;
320cfca06d7SDimitry Andric };
321cfca06d7SDimitry Andric
322cfca06d7SDimitry Andric /// Generic information that describes a runtime function
323cfca06d7SDimitry Andric struct RuntimeFunctionInfo {
324cfca06d7SDimitry Andric
325cfca06d7SDimitry Andric /// The kind, as described by the RuntimeFunction enum.
326cfca06d7SDimitry Andric RuntimeFunction Kind;
327cfca06d7SDimitry Andric
328cfca06d7SDimitry Andric /// The name of the function.
329cfca06d7SDimitry Andric StringRef Name;
330cfca06d7SDimitry Andric
331cfca06d7SDimitry Andric /// Flag to indicate a variadic function.
332cfca06d7SDimitry Andric bool IsVarArg;
333cfca06d7SDimitry Andric
334cfca06d7SDimitry Andric /// The return type of the function.
335cfca06d7SDimitry Andric Type *ReturnType;
336cfca06d7SDimitry Andric
337cfca06d7SDimitry Andric /// The argument types of the function.
338cfca06d7SDimitry Andric SmallVector<Type *, 8> ArgumentTypes;
339cfca06d7SDimitry Andric
340cfca06d7SDimitry Andric /// The declaration if available.
341cfca06d7SDimitry Andric Function *Declaration = nullptr;
342cfca06d7SDimitry Andric
343cfca06d7SDimitry Andric /// Uses of this runtime function per function containing the use.
344cfca06d7SDimitry Andric using UseVector = SmallVector<Use *, 16>;
345cfca06d7SDimitry Andric
346cfca06d7SDimitry Andric /// Clear UsesMap for runtime function.
clearUsesMap__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo347cfca06d7SDimitry Andric void clearUsesMap() { UsesMap.clear(); }
348cfca06d7SDimitry Andric
349cfca06d7SDimitry Andric /// Boolean conversion that is true if the runtime function was found.
operator bool__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo350cfca06d7SDimitry Andric operator bool() const { return Declaration; }
351cfca06d7SDimitry Andric
352cfca06d7SDimitry Andric /// Return the vector of uses in function \p F.
getOrCreateUseVector__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo353cfca06d7SDimitry Andric UseVector &getOrCreateUseVector(Function *F) {
354cfca06d7SDimitry Andric std::shared_ptr<UseVector> &UV = UsesMap[F];
355cfca06d7SDimitry Andric if (!UV)
356cfca06d7SDimitry Andric UV = std::make_shared<UseVector>();
357cfca06d7SDimitry Andric return *UV;
358cfca06d7SDimitry Andric }
359cfca06d7SDimitry Andric
360cfca06d7SDimitry Andric /// Return the vector of uses in function \p F or `nullptr` if there are
361cfca06d7SDimitry Andric /// none.
getUseVector__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo362cfca06d7SDimitry Andric const UseVector *getUseVector(Function &F) const {
363cfca06d7SDimitry Andric auto I = UsesMap.find(&F);
364cfca06d7SDimitry Andric if (I != UsesMap.end())
365cfca06d7SDimitry Andric return I->second.get();
366cfca06d7SDimitry Andric return nullptr;
367cfca06d7SDimitry Andric }
368cfca06d7SDimitry Andric
369cfca06d7SDimitry Andric /// Return how many functions contain uses of this runtime function.
getNumFunctionsWithUses__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo370cfca06d7SDimitry Andric size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
371cfca06d7SDimitry Andric
372cfca06d7SDimitry Andric /// Return the number of arguments (or the minimal number for variadic
373cfca06d7SDimitry Andric /// functions).
getNumArgs__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo374cfca06d7SDimitry Andric size_t getNumArgs() const { return ArgumentTypes.size(); }
375cfca06d7SDimitry Andric
376cfca06d7SDimitry Andric /// Run the callback \p CB on each use and forget the use if the result is
377cfca06d7SDimitry Andric /// true. The callback will be fed the function in which the use was
378cfca06d7SDimitry Andric /// encountered as second argument.
foreachUse__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo379cfca06d7SDimitry Andric void foreachUse(SmallVectorImpl<Function *> &SCC,
380cfca06d7SDimitry Andric function_ref<bool(Use &, Function &)> CB) {
381cfca06d7SDimitry Andric for (Function *F : SCC)
382cfca06d7SDimitry Andric foreachUse(CB, F);
383cfca06d7SDimitry Andric }
384cfca06d7SDimitry Andric
385cfca06d7SDimitry Andric /// Run the callback \p CB on each use within the function \p F and forget
386cfca06d7SDimitry Andric /// the use if the result is true.
foreachUse__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo387cfca06d7SDimitry Andric void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
388cfca06d7SDimitry Andric SmallVector<unsigned, 8> ToBeDeleted;
389cfca06d7SDimitry Andric ToBeDeleted.clear();
390cfca06d7SDimitry Andric
391cfca06d7SDimitry Andric unsigned Idx = 0;
392cfca06d7SDimitry Andric UseVector &UV = getOrCreateUseVector(F);
393cfca06d7SDimitry Andric
394cfca06d7SDimitry Andric for (Use *U : UV) {
395cfca06d7SDimitry Andric if (CB(*U, *F))
396cfca06d7SDimitry Andric ToBeDeleted.push_back(Idx);
397cfca06d7SDimitry Andric ++Idx;
398cfca06d7SDimitry Andric }
399cfca06d7SDimitry Andric
400cfca06d7SDimitry Andric // Remove the to-be-deleted indices in reverse order as prior
401cfca06d7SDimitry Andric // modifications will not modify the smaller indices.
402cfca06d7SDimitry Andric while (!ToBeDeleted.empty()) {
403cfca06d7SDimitry Andric unsigned Idx = ToBeDeleted.pop_back_val();
404cfca06d7SDimitry Andric UV[Idx] = UV.back();
405cfca06d7SDimitry Andric UV.pop_back();
406cfca06d7SDimitry Andric }
407cfca06d7SDimitry Andric }
408cfca06d7SDimitry Andric
409cfca06d7SDimitry Andric private:
410cfca06d7SDimitry Andric /// Map from functions to all uses of this runtime function contained in
411cfca06d7SDimitry Andric /// them.
412cfca06d7SDimitry Andric DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
413344a3780SDimitry Andric
414344a3780SDimitry Andric public:
415344a3780SDimitry Andric /// Iterators for the uses of this runtime function.
begin__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo416344a3780SDimitry Andric decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
end__anon7bbaa8dc0111::OMPInformationCache::RuntimeFunctionInfo417344a3780SDimitry Andric decltype(UsesMap)::iterator end() { return UsesMap.end(); }
418cfca06d7SDimitry Andric };
419cfca06d7SDimitry Andric
420cfca06d7SDimitry Andric /// An OpenMP-IR-Builder instance
421cfca06d7SDimitry Andric OpenMPIRBuilder OMPBuilder;
422cfca06d7SDimitry Andric
423cfca06d7SDimitry Andric /// Map from runtime function kind to the runtime function description.
424cfca06d7SDimitry Andric EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
425cfca06d7SDimitry Andric RuntimeFunction::OMPRTL___last>
426cfca06d7SDimitry Andric RFIs;
427cfca06d7SDimitry Andric
428344a3780SDimitry Andric /// Map from function declarations/definitions to their runtime enum type.
429344a3780SDimitry Andric DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;
430344a3780SDimitry Andric
431cfca06d7SDimitry Andric /// Map from ICV kind to the ICV description.
432cfca06d7SDimitry Andric EnumeratedArray<InternalControlVarInfo, InternalControlVar,
433cfca06d7SDimitry Andric InternalControlVar::ICV___last>
434cfca06d7SDimitry Andric ICVs;
435cfca06d7SDimitry Andric
436cfca06d7SDimitry Andric /// Helper to initialize all internal control variable information for those
437cfca06d7SDimitry Andric /// defined in OMPKinds.def.
initializeInternalControlVars__anon7bbaa8dc0111::OMPInformationCache438cfca06d7SDimitry Andric void initializeInternalControlVars() {
439cfca06d7SDimitry Andric #define ICV_RT_SET(_Name, RTL) \
440cfca06d7SDimitry Andric { \
441cfca06d7SDimitry Andric auto &ICV = ICVs[_Name]; \
442cfca06d7SDimitry Andric ICV.Setter = RTL; \
443cfca06d7SDimitry Andric }
444cfca06d7SDimitry Andric #define ICV_RT_GET(Name, RTL) \
445cfca06d7SDimitry Andric { \
446cfca06d7SDimitry Andric auto &ICV = ICVs[Name]; \
447cfca06d7SDimitry Andric ICV.Getter = RTL; \
448cfca06d7SDimitry Andric }
449cfca06d7SDimitry Andric #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \
450cfca06d7SDimitry Andric { \
451cfca06d7SDimitry Andric auto &ICV = ICVs[Enum]; \
452cfca06d7SDimitry Andric ICV.Name = _Name; \
453cfca06d7SDimitry Andric ICV.Kind = Enum; \
454cfca06d7SDimitry Andric ICV.InitKind = Init; \
455cfca06d7SDimitry Andric ICV.EnvVarName = _EnvVarName; \
456cfca06d7SDimitry Andric switch (ICV.InitKind) { \
457cfca06d7SDimitry Andric case ICV_IMPLEMENTATION_DEFINED: \
458cfca06d7SDimitry Andric ICV.InitValue = nullptr; \
459cfca06d7SDimitry Andric break; \
460cfca06d7SDimitry Andric case ICV_ZERO: \
461cfca06d7SDimitry Andric ICV.InitValue = ConstantInt::get( \
462cfca06d7SDimitry Andric Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \
463cfca06d7SDimitry Andric break; \
464cfca06d7SDimitry Andric case ICV_FALSE: \
465cfca06d7SDimitry Andric ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \
466cfca06d7SDimitry Andric break; \
467cfca06d7SDimitry Andric case ICV_LAST: \
468cfca06d7SDimitry Andric break; \
469cfca06d7SDimitry Andric } \
470cfca06d7SDimitry Andric }
471cfca06d7SDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def"
472cfca06d7SDimitry Andric }
473cfca06d7SDimitry Andric
474cfca06d7SDimitry Andric /// Returns true if the function declaration \p F matches the runtime
475cfca06d7SDimitry Andric /// function types, that is, return type \p RTFRetType, and argument types
476cfca06d7SDimitry Andric /// \p RTFArgTypes.
declMatchesRTFTypes__anon7bbaa8dc0111::OMPInformationCache477cfca06d7SDimitry Andric static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
478cfca06d7SDimitry Andric SmallVector<Type *, 8> &RTFArgTypes) {
479cfca06d7SDimitry Andric // TODO: We should output information to the user (under debug output
480cfca06d7SDimitry Andric // and via remarks).
481cfca06d7SDimitry Andric
482cfca06d7SDimitry Andric if (!F)
483cfca06d7SDimitry Andric return false;
484cfca06d7SDimitry Andric if (F->getReturnType() != RTFRetType)
485cfca06d7SDimitry Andric return false;
486cfca06d7SDimitry Andric if (F->arg_size() != RTFArgTypes.size())
487cfca06d7SDimitry Andric return false;
488cfca06d7SDimitry Andric
489c0981da4SDimitry Andric auto *RTFTyIt = RTFArgTypes.begin();
490cfca06d7SDimitry Andric for (Argument &Arg : F->args()) {
491cfca06d7SDimitry Andric if (Arg.getType() != *RTFTyIt)
492cfca06d7SDimitry Andric return false;
493cfca06d7SDimitry Andric
494cfca06d7SDimitry Andric ++RTFTyIt;
495cfca06d7SDimitry Andric }
496cfca06d7SDimitry Andric
497cfca06d7SDimitry Andric return true;
498cfca06d7SDimitry Andric }
499cfca06d7SDimitry Andric
500cfca06d7SDimitry Andric // Helper to collect all uses of the declaration in the UsesMap.
collectUses__anon7bbaa8dc0111::OMPInformationCache501cfca06d7SDimitry Andric unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
502cfca06d7SDimitry Andric unsigned NumUses = 0;
503cfca06d7SDimitry Andric if (!RFI.Declaration)
504cfca06d7SDimitry Andric return NumUses;
505cfca06d7SDimitry Andric OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
506cfca06d7SDimitry Andric
507cfca06d7SDimitry Andric if (CollectStats) {
508cfca06d7SDimitry Andric NumOpenMPRuntimeFunctionsIdentified += 1;
509cfca06d7SDimitry Andric NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
510cfca06d7SDimitry Andric }
511cfca06d7SDimitry Andric
512cfca06d7SDimitry Andric // TODO: We directly convert uses into proper calls and unknown uses.
513cfca06d7SDimitry Andric for (Use &U : RFI.Declaration->uses()) {
514cfca06d7SDimitry Andric if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
5157fa27ce4SDimitry Andric if (!CGSCC || CGSCC->empty() || CGSCC->contains(UserI->getFunction())) {
516cfca06d7SDimitry Andric RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
517cfca06d7SDimitry Andric ++NumUses;
518cfca06d7SDimitry Andric }
519cfca06d7SDimitry Andric } else {
520cfca06d7SDimitry Andric RFI.getOrCreateUseVector(nullptr).push_back(&U);
521cfca06d7SDimitry Andric ++NumUses;
522cfca06d7SDimitry Andric }
523cfca06d7SDimitry Andric }
524cfca06d7SDimitry Andric return NumUses;
525cfca06d7SDimitry Andric }
526cfca06d7SDimitry Andric
527b60736ecSDimitry Andric // Helper function to recollect uses of a runtime function.
recollectUsesForFunction__anon7bbaa8dc0111::OMPInformationCache528b60736ecSDimitry Andric void recollectUsesForFunction(RuntimeFunction RTF) {
529b60736ecSDimitry Andric auto &RFI = RFIs[RTF];
530cfca06d7SDimitry Andric RFI.clearUsesMap();
531cfca06d7SDimitry Andric collectUses(RFI, /*CollectStats*/ false);
532cfca06d7SDimitry Andric }
533b60736ecSDimitry Andric
534b60736ecSDimitry Andric // Helper function to recollect uses of all runtime functions.
recollectUses__anon7bbaa8dc0111::OMPInformationCache535b60736ecSDimitry Andric void recollectUses() {
536b60736ecSDimitry Andric for (int Idx = 0; Idx < RFIs.size(); ++Idx)
537b60736ecSDimitry Andric recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
538cfca06d7SDimitry Andric }
539cfca06d7SDimitry Andric
5406f8fc217SDimitry Andric // Helper function to inherit the calling convention of the function callee.
setCallingConvention__anon7bbaa8dc0111::OMPInformationCache5416f8fc217SDimitry Andric void setCallingConvention(FunctionCallee Callee, CallInst *CI) {
5426f8fc217SDimitry Andric if (Function *Fn = dyn_cast<Function>(Callee.getCallee()))
5436f8fc217SDimitry Andric CI->setCallingConv(Fn->getCallingConv());
5446f8fc217SDimitry Andric }
5456f8fc217SDimitry Andric
5467fa27ce4SDimitry Andric // Helper function to determine if it's legal to create a call to the runtime
5477fa27ce4SDimitry Andric // functions.
runtimeFnsAvailable__anon7bbaa8dc0111::OMPInformationCache5487fa27ce4SDimitry Andric bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) {
5497fa27ce4SDimitry Andric // We can always emit calls if we haven't yet linked in the runtime.
5507fa27ce4SDimitry Andric if (!OpenMPPostLink)
5517fa27ce4SDimitry Andric return true;
5527fa27ce4SDimitry Andric
5537fa27ce4SDimitry Andric // Once the runtime has been already been linked in we cannot emit calls to
5547fa27ce4SDimitry Andric // any undefined functions.
5557fa27ce4SDimitry Andric for (RuntimeFunction Fn : Fns) {
5567fa27ce4SDimitry Andric RuntimeFunctionInfo &RFI = RFIs[Fn];
5577fa27ce4SDimitry Andric
5587fa27ce4SDimitry Andric if (RFI.Declaration && RFI.Declaration->isDeclaration())
5597fa27ce4SDimitry Andric return false;
5607fa27ce4SDimitry Andric }
5617fa27ce4SDimitry Andric return true;
5627fa27ce4SDimitry Andric }
5637fa27ce4SDimitry Andric
564cfca06d7SDimitry Andric /// Helper to initialize all runtime function information for those defined
565cfca06d7SDimitry Andric /// in OpenMPKinds.def.
initializeRuntimeFunctions__anon7bbaa8dc0111::OMPInformationCache566e3b55780SDimitry Andric void initializeRuntimeFunctions(Module &M) {
567cfca06d7SDimitry Andric
568cfca06d7SDimitry Andric // Helper macros for handling __VA_ARGS__ in OMP_RTL
569cfca06d7SDimitry Andric #define OMP_TYPE(VarName, ...) \
570cfca06d7SDimitry Andric Type *VarName = OMPBuilder.VarName; \
571cfca06d7SDimitry Andric (void)VarName;
572cfca06d7SDimitry Andric
573cfca06d7SDimitry Andric #define OMP_ARRAY_TYPE(VarName, ...) \
574cfca06d7SDimitry Andric ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \
575cfca06d7SDimitry Andric (void)VarName##Ty; \
576cfca06d7SDimitry Andric PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \
577cfca06d7SDimitry Andric (void)VarName##PtrTy;
578cfca06d7SDimitry Andric
579cfca06d7SDimitry Andric #define OMP_FUNCTION_TYPE(VarName, ...) \
580cfca06d7SDimitry Andric FunctionType *VarName = OMPBuilder.VarName; \
581cfca06d7SDimitry Andric (void)VarName; \
582cfca06d7SDimitry Andric PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
583cfca06d7SDimitry Andric (void)VarName##Ptr;
584cfca06d7SDimitry Andric
585cfca06d7SDimitry Andric #define OMP_STRUCT_TYPE(VarName, ...) \
586cfca06d7SDimitry Andric StructType *VarName = OMPBuilder.VarName; \
587cfca06d7SDimitry Andric (void)VarName; \
588cfca06d7SDimitry Andric PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
589cfca06d7SDimitry Andric (void)VarName##Ptr;
590cfca06d7SDimitry Andric
591cfca06d7SDimitry Andric #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \
592cfca06d7SDimitry Andric { \
593cfca06d7SDimitry Andric SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \
594cfca06d7SDimitry Andric Function *F = M.getFunction(_Name); \
595344a3780SDimitry Andric RTLFunctions.insert(F); \
596cfca06d7SDimitry Andric if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
597344a3780SDimitry Andric RuntimeFunctionIDMap[F] = _Enum; \
598cfca06d7SDimitry Andric auto &RFI = RFIs[_Enum]; \
599cfca06d7SDimitry Andric RFI.Kind = _Enum; \
600cfca06d7SDimitry Andric RFI.Name = _Name; \
601cfca06d7SDimitry Andric RFI.IsVarArg = _IsVarArg; \
602cfca06d7SDimitry Andric RFI.ReturnType = OMPBuilder._ReturnType; \
603cfca06d7SDimitry Andric RFI.ArgumentTypes = std::move(ArgsTypes); \
604cfca06d7SDimitry Andric RFI.Declaration = F; \
605cfca06d7SDimitry Andric unsigned NumUses = collectUses(RFI); \
606cfca06d7SDimitry Andric (void)NumUses; \
607cfca06d7SDimitry Andric LLVM_DEBUG({ \
608cfca06d7SDimitry Andric dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \
609cfca06d7SDimitry Andric << " found\n"; \
610cfca06d7SDimitry Andric if (RFI.Declaration) \
611cfca06d7SDimitry Andric dbgs() << TAG << "-> got " << NumUses << " uses in " \
612cfca06d7SDimitry Andric << RFI.getNumFunctionsWithUses() \
613cfca06d7SDimitry Andric << " different functions.\n"; \
614cfca06d7SDimitry Andric }); \
615cfca06d7SDimitry Andric } \
616cfca06d7SDimitry Andric }
617cfca06d7SDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def"
618cfca06d7SDimitry Andric
619e3b55780SDimitry Andric // Remove the `noinline` attribute from `__kmpc`, `ompx::` and `omp_`
620e3b55780SDimitry Andric // functions, except if `optnone` is present.
621e3b55780SDimitry Andric if (isOpenMPDevice(M)) {
622e3b55780SDimitry Andric for (Function &F : M) {
623e3b55780SDimitry Andric for (StringRef Prefix : {"__kmpc", "_ZN4ompx", "omp_"})
624e3b55780SDimitry Andric if (F.hasFnAttribute(Attribute::NoInline) &&
625b1c73532SDimitry Andric F.getName().starts_with(Prefix) &&
626e3b55780SDimitry Andric !F.hasFnAttribute(Attribute::OptimizeNone))
627e3b55780SDimitry Andric F.removeFnAttr(Attribute::NoInline);
628e3b55780SDimitry Andric }
629e3b55780SDimitry Andric }
630e3b55780SDimitry Andric
631cfca06d7SDimitry Andric // TODO: We should attach the attributes defined in OMPKinds.def.
632cfca06d7SDimitry Andric }
633cfca06d7SDimitry Andric
634344a3780SDimitry Andric /// Collection of known OpenMP runtime functions..
635344a3780SDimitry Andric DenseSet<const Function *> RTLFunctions;
6367fa27ce4SDimitry Andric
6377fa27ce4SDimitry Andric /// Indicates if we have already linked in the OpenMP device library.
6387fa27ce4SDimitry Andric bool OpenMPPostLink = false;
639344a3780SDimitry Andric };
640344a3780SDimitry Andric
641344a3780SDimitry Andric template <typename Ty, bool InsertInvalidates = true>
642344a3780SDimitry Andric struct BooleanStateWithSetVector : public BooleanState {
contains__anon7bbaa8dc0111::BooleanStateWithSetVector643344a3780SDimitry Andric bool contains(const Ty &Elem) const { return Set.contains(Elem); }
insert__anon7bbaa8dc0111::BooleanStateWithSetVector644344a3780SDimitry Andric bool insert(const Ty &Elem) {
645344a3780SDimitry Andric if (InsertInvalidates)
646344a3780SDimitry Andric BooleanState::indicatePessimisticFixpoint();
647344a3780SDimitry Andric return Set.insert(Elem);
648344a3780SDimitry Andric }
649344a3780SDimitry Andric
operator []__anon7bbaa8dc0111::BooleanStateWithSetVector650344a3780SDimitry Andric const Ty &operator[](int Idx) const { return Set[Idx]; }
operator ==__anon7bbaa8dc0111::BooleanStateWithSetVector651344a3780SDimitry Andric bool operator==(const BooleanStateWithSetVector &RHS) const {
652344a3780SDimitry Andric return BooleanState::operator==(RHS) && Set == RHS.Set;
653344a3780SDimitry Andric }
operator !=__anon7bbaa8dc0111::BooleanStateWithSetVector654344a3780SDimitry Andric bool operator!=(const BooleanStateWithSetVector &RHS) const {
655344a3780SDimitry Andric return !(*this == RHS);
656344a3780SDimitry Andric }
657344a3780SDimitry Andric
empty__anon7bbaa8dc0111::BooleanStateWithSetVector658344a3780SDimitry Andric bool empty() const { return Set.empty(); }
size__anon7bbaa8dc0111::BooleanStateWithSetVector659344a3780SDimitry Andric size_t size() const { return Set.size(); }
660344a3780SDimitry Andric
661344a3780SDimitry Andric /// "Clamp" this state with \p RHS.
operator ^=__anon7bbaa8dc0111::BooleanStateWithSetVector662344a3780SDimitry Andric BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) {
663344a3780SDimitry Andric BooleanState::operator^=(RHS);
664344a3780SDimitry Andric Set.insert(RHS.Set.begin(), RHS.Set.end());
665344a3780SDimitry Andric return *this;
666344a3780SDimitry Andric }
667344a3780SDimitry Andric
668344a3780SDimitry Andric private:
669344a3780SDimitry Andric /// A set to keep track of elements.
670344a3780SDimitry Andric SetVector<Ty> Set;
671344a3780SDimitry Andric
672344a3780SDimitry Andric public:
begin__anon7bbaa8dc0111::BooleanStateWithSetVector673344a3780SDimitry Andric typename decltype(Set)::iterator begin() { return Set.begin(); }
end__anon7bbaa8dc0111::BooleanStateWithSetVector674344a3780SDimitry Andric typename decltype(Set)::iterator end() { return Set.end(); }
begin__anon7bbaa8dc0111::BooleanStateWithSetVector675344a3780SDimitry Andric typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
end__anon7bbaa8dc0111::BooleanStateWithSetVector676344a3780SDimitry Andric typename decltype(Set)::const_iterator end() const { return Set.end(); }
677344a3780SDimitry Andric };
678344a3780SDimitry Andric
679344a3780SDimitry Andric template <typename Ty, bool InsertInvalidates = true>
680344a3780SDimitry Andric using BooleanStateWithPtrSetVector =
681344a3780SDimitry Andric BooleanStateWithSetVector<Ty *, InsertInvalidates>;
682344a3780SDimitry Andric
683344a3780SDimitry Andric struct KernelInfoState : AbstractState {
684344a3780SDimitry Andric /// Flag to track if we reached a fixpoint.
685344a3780SDimitry Andric bool IsAtFixpoint = false;
686344a3780SDimitry Andric
687344a3780SDimitry Andric /// The parallel regions (identified by the outlined parallel functions) that
688344a3780SDimitry Andric /// can be reached from the associated function.
689b1c73532SDimitry Andric BooleanStateWithPtrSetVector<CallBase, /* InsertInvalidates */ false>
690344a3780SDimitry Andric ReachedKnownParallelRegions;
691344a3780SDimitry Andric
692344a3780SDimitry Andric /// State to track what parallel region we might reach.
693344a3780SDimitry Andric BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
694344a3780SDimitry Andric
695344a3780SDimitry Andric /// State to track if we are in SPMD-mode, assumed or know, and why we decided
696344a3780SDimitry Andric /// we cannot be. If it is assumed, then RequiresFullRuntime should also be
697344a3780SDimitry Andric /// false.
698c0981da4SDimitry Andric BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
699344a3780SDimitry Andric
700344a3780SDimitry Andric /// The __kmpc_target_init call in this kernel, if any. If we find more than
701344a3780SDimitry Andric /// one we abort as the kernel is malformed.
702344a3780SDimitry Andric CallBase *KernelInitCB = nullptr;
703344a3780SDimitry Andric
704b1c73532SDimitry Andric /// The constant kernel environement as taken from and passed to
705b1c73532SDimitry Andric /// __kmpc_target_init.
706b1c73532SDimitry Andric ConstantStruct *KernelEnvC = nullptr;
707b1c73532SDimitry Andric
708344a3780SDimitry Andric /// The __kmpc_target_deinit call in this kernel, if any. If we find more than
709344a3780SDimitry Andric /// one we abort as the kernel is malformed.
710344a3780SDimitry Andric CallBase *KernelDeinitCB = nullptr;
711344a3780SDimitry Andric
712344a3780SDimitry Andric /// Flag to indicate if the associated function is a kernel entry.
713344a3780SDimitry Andric bool IsKernelEntry = false;
714344a3780SDimitry Andric
715344a3780SDimitry Andric /// State to track what kernel entries can reach the associated function.
716344a3780SDimitry Andric BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries;
717344a3780SDimitry Andric
718344a3780SDimitry Andric /// State to indicate if we can track parallel level of the associated
719344a3780SDimitry Andric /// function. We will give up tracking if we encounter unknown caller or the
720344a3780SDimitry Andric /// caller is __kmpc_parallel_51.
721344a3780SDimitry Andric BooleanStateWithSetVector<uint8_t> ParallelLevels;
722344a3780SDimitry Andric
723e3b55780SDimitry Andric /// Flag that indicates if the kernel has nested Parallelism
724e3b55780SDimitry Andric bool NestedParallelism = false;
725e3b55780SDimitry Andric
726344a3780SDimitry Andric /// Abstract State interface
727344a3780SDimitry Andric ///{
728344a3780SDimitry Andric
729145449b1SDimitry Andric KernelInfoState() = default;
KernelInfoState__anon7bbaa8dc0111::KernelInfoState730344a3780SDimitry Andric KernelInfoState(bool BestState) {
731344a3780SDimitry Andric if (!BestState)
732344a3780SDimitry Andric indicatePessimisticFixpoint();
733344a3780SDimitry Andric }
734344a3780SDimitry Andric
735344a3780SDimitry Andric /// See AbstractState::isValidState(...)
isValidState__anon7bbaa8dc0111::KernelInfoState736344a3780SDimitry Andric bool isValidState() const override { return true; }
737344a3780SDimitry Andric
738344a3780SDimitry Andric /// See AbstractState::isAtFixpoint(...)
isAtFixpoint__anon7bbaa8dc0111::KernelInfoState739344a3780SDimitry Andric bool isAtFixpoint() const override { return IsAtFixpoint; }
740344a3780SDimitry Andric
741344a3780SDimitry Andric /// See AbstractState::indicatePessimisticFixpoint(...)
indicatePessimisticFixpoint__anon7bbaa8dc0111::KernelInfoState742344a3780SDimitry Andric ChangeStatus indicatePessimisticFixpoint() override {
743344a3780SDimitry Andric IsAtFixpoint = true;
744e3b55780SDimitry Andric ParallelLevels.indicatePessimisticFixpoint();
745c0981da4SDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint();
746344a3780SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
747c0981da4SDimitry Andric ReachedKnownParallelRegions.indicatePessimisticFixpoint();
748344a3780SDimitry Andric ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
749b1c73532SDimitry Andric NestedParallelism = true;
750344a3780SDimitry Andric return ChangeStatus::CHANGED;
751344a3780SDimitry Andric }
752344a3780SDimitry Andric
753344a3780SDimitry Andric /// See AbstractState::indicateOptimisticFixpoint(...)
indicateOptimisticFixpoint__anon7bbaa8dc0111::KernelInfoState754344a3780SDimitry Andric ChangeStatus indicateOptimisticFixpoint() override {
755344a3780SDimitry Andric IsAtFixpoint = true;
756e3b55780SDimitry Andric ParallelLevels.indicateOptimisticFixpoint();
757c0981da4SDimitry Andric ReachingKernelEntries.indicateOptimisticFixpoint();
758c0981da4SDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint();
759c0981da4SDimitry Andric ReachedKnownParallelRegions.indicateOptimisticFixpoint();
760c0981da4SDimitry Andric ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
761344a3780SDimitry Andric return ChangeStatus::UNCHANGED;
762344a3780SDimitry Andric }
763344a3780SDimitry Andric
764344a3780SDimitry Andric /// Return the assumed state
getAssumed__anon7bbaa8dc0111::KernelInfoState765344a3780SDimitry Andric KernelInfoState &getAssumed() { return *this; }
getAssumed__anon7bbaa8dc0111::KernelInfoState766344a3780SDimitry Andric const KernelInfoState &getAssumed() const { return *this; }
767344a3780SDimitry Andric
operator ==__anon7bbaa8dc0111::KernelInfoState768344a3780SDimitry Andric bool operator==(const KernelInfoState &RHS) const {
769344a3780SDimitry Andric if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
770344a3780SDimitry Andric return false;
771344a3780SDimitry Andric if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
772344a3780SDimitry Andric return false;
773344a3780SDimitry Andric if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
774344a3780SDimitry Andric return false;
775344a3780SDimitry Andric if (ReachingKernelEntries != RHS.ReachingKernelEntries)
776344a3780SDimitry Andric return false;
777e3b55780SDimitry Andric if (ParallelLevels != RHS.ParallelLevels)
778e3b55780SDimitry Andric return false;
779b1c73532SDimitry Andric if (NestedParallelism != RHS.NestedParallelism)
780b1c73532SDimitry Andric return false;
781344a3780SDimitry Andric return true;
782344a3780SDimitry Andric }
783344a3780SDimitry Andric
784c0981da4SDimitry Andric /// Returns true if this kernel contains any OpenMP parallel regions.
mayContainParallelRegion__anon7bbaa8dc0111::KernelInfoState785c0981da4SDimitry Andric bool mayContainParallelRegion() {
786c0981da4SDimitry Andric return !ReachedKnownParallelRegions.empty() ||
787c0981da4SDimitry Andric !ReachedUnknownParallelRegions.empty();
788c0981da4SDimitry Andric }
789c0981da4SDimitry Andric
790344a3780SDimitry Andric /// Return empty set as the best state of potential values.
getBestState__anon7bbaa8dc0111::KernelInfoState791344a3780SDimitry Andric static KernelInfoState getBestState() { return KernelInfoState(true); }
792344a3780SDimitry Andric
getBestState__anon7bbaa8dc0111::KernelInfoState793344a3780SDimitry Andric static KernelInfoState getBestState(KernelInfoState &KIS) {
794344a3780SDimitry Andric return getBestState();
795344a3780SDimitry Andric }
796344a3780SDimitry Andric
797344a3780SDimitry Andric /// Return full set as the worst state of potential values.
getWorstState__anon7bbaa8dc0111::KernelInfoState798344a3780SDimitry Andric static KernelInfoState getWorstState() { return KernelInfoState(false); }
799344a3780SDimitry Andric
800344a3780SDimitry Andric /// "Clamp" this state with \p KIS.
operator ^=__anon7bbaa8dc0111::KernelInfoState801344a3780SDimitry Andric KernelInfoState operator^=(const KernelInfoState &KIS) {
802344a3780SDimitry Andric // Do not merge two different _init and _deinit call sites.
803344a3780SDimitry Andric if (KIS.KernelInitCB) {
804344a3780SDimitry Andric if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
805c0981da4SDimitry Andric llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
806c0981da4SDimitry Andric "assumptions.");
807344a3780SDimitry Andric KernelInitCB = KIS.KernelInitCB;
808344a3780SDimitry Andric }
809344a3780SDimitry Andric if (KIS.KernelDeinitCB) {
810344a3780SDimitry Andric if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
811c0981da4SDimitry Andric llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
812c0981da4SDimitry Andric "assumptions.");
813344a3780SDimitry Andric KernelDeinitCB = KIS.KernelDeinitCB;
814344a3780SDimitry Andric }
815b1c73532SDimitry Andric if (KIS.KernelEnvC) {
816b1c73532SDimitry Andric if (KernelEnvC && KernelEnvC != KIS.KernelEnvC)
817b1c73532SDimitry Andric llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
818b1c73532SDimitry Andric "assumptions.");
819b1c73532SDimitry Andric KernelEnvC = KIS.KernelEnvC;
820b1c73532SDimitry Andric }
821344a3780SDimitry Andric SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
822344a3780SDimitry Andric ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
823344a3780SDimitry Andric ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
824e3b55780SDimitry Andric NestedParallelism |= KIS.NestedParallelism;
825344a3780SDimitry Andric return *this;
826344a3780SDimitry Andric }
827344a3780SDimitry Andric
operator &=__anon7bbaa8dc0111::KernelInfoState828344a3780SDimitry Andric KernelInfoState operator&=(const KernelInfoState &KIS) {
829344a3780SDimitry Andric return (*this ^= KIS);
830344a3780SDimitry Andric }
831344a3780SDimitry Andric
832344a3780SDimitry Andric ///}
833cfca06d7SDimitry Andric };
834cfca06d7SDimitry Andric
835b60736ecSDimitry Andric /// Used to map the values physically (in the IR) stored in an offload
836b60736ecSDimitry Andric /// array, to a vector in memory.
837b60736ecSDimitry Andric struct OffloadArray {
838b60736ecSDimitry Andric /// Physical array (in the IR).
839b60736ecSDimitry Andric AllocaInst *Array = nullptr;
840b60736ecSDimitry Andric /// Mapped values.
841b60736ecSDimitry Andric SmallVector<Value *, 8> StoredValues;
842b60736ecSDimitry Andric /// Last stores made in the offload array.
843b60736ecSDimitry Andric SmallVector<StoreInst *, 8> LastAccesses;
844b60736ecSDimitry Andric
845b60736ecSDimitry Andric OffloadArray() = default;
846b60736ecSDimitry Andric
847b60736ecSDimitry Andric /// Initializes the OffloadArray with the values stored in \p Array before
848b60736ecSDimitry Andric /// instruction \p Before is reached. Returns false if the initialization
849b60736ecSDimitry Andric /// fails.
850b60736ecSDimitry Andric /// This MUST be used immediately after the construction of the object.
initialize__anon7bbaa8dc0111::OffloadArray851b60736ecSDimitry Andric bool initialize(AllocaInst &Array, Instruction &Before) {
852b60736ecSDimitry Andric if (!Array.getAllocatedType()->isArrayTy())
853b60736ecSDimitry Andric return false;
854b60736ecSDimitry Andric
855b60736ecSDimitry Andric if (!getValues(Array, Before))
856b60736ecSDimitry Andric return false;
857b60736ecSDimitry Andric
858b60736ecSDimitry Andric this->Array = &Array;
859b60736ecSDimitry Andric return true;
860b60736ecSDimitry Andric }
861b60736ecSDimitry Andric
862b60736ecSDimitry Andric static const unsigned DeviceIDArgNum = 1;
863b60736ecSDimitry Andric static const unsigned BasePtrsArgNum = 3;
864b60736ecSDimitry Andric static const unsigned PtrsArgNum = 4;
865b60736ecSDimitry Andric static const unsigned SizesArgNum = 5;
866b60736ecSDimitry Andric
867b60736ecSDimitry Andric private:
868b60736ecSDimitry Andric /// Traverses the BasicBlock where \p Array is, collecting the stores made to
869b60736ecSDimitry Andric /// \p Array, leaving StoredValues with the values stored before the
870b60736ecSDimitry Andric /// instruction \p Before is reached.
getValues__anon7bbaa8dc0111::OffloadArray871b60736ecSDimitry Andric bool getValues(AllocaInst &Array, Instruction &Before) {
872b60736ecSDimitry Andric // Initialize container.
873b60736ecSDimitry Andric const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
874b60736ecSDimitry Andric StoredValues.assign(NumValues, nullptr);
875b60736ecSDimitry Andric LastAccesses.assign(NumValues, nullptr);
876b60736ecSDimitry Andric
877b60736ecSDimitry Andric // TODO: This assumes the instruction \p Before is in the same
878b60736ecSDimitry Andric // BasicBlock as Array. Make it general, for any control flow graph.
879b60736ecSDimitry Andric BasicBlock *BB = Array.getParent();
880b60736ecSDimitry Andric if (BB != Before.getParent())
881b60736ecSDimitry Andric return false;
882b60736ecSDimitry Andric
883ac9a064cSDimitry Andric const DataLayout &DL = Array.getDataLayout();
884b60736ecSDimitry Andric const unsigned int PointerSize = DL.getPointerSize();
885b60736ecSDimitry Andric
886b60736ecSDimitry Andric for (Instruction &I : *BB) {
887b60736ecSDimitry Andric if (&I == &Before)
888b60736ecSDimitry Andric break;
889b60736ecSDimitry Andric
890b60736ecSDimitry Andric if (!isa<StoreInst>(&I))
891b60736ecSDimitry Andric continue;
892b60736ecSDimitry Andric
893b60736ecSDimitry Andric auto *S = cast<StoreInst>(&I);
894b60736ecSDimitry Andric int64_t Offset = -1;
895b60736ecSDimitry Andric auto *Dst =
896b60736ecSDimitry Andric GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
897b60736ecSDimitry Andric if (Dst == &Array) {
898b60736ecSDimitry Andric int64_t Idx = Offset / PointerSize;
899b60736ecSDimitry Andric StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
900b60736ecSDimitry Andric LastAccesses[Idx] = S;
901b60736ecSDimitry Andric }
902b60736ecSDimitry Andric }
903b60736ecSDimitry Andric
904b60736ecSDimitry Andric return isFilled();
905b60736ecSDimitry Andric }
906b60736ecSDimitry Andric
907b60736ecSDimitry Andric /// Returns true if all values in StoredValues and
908b60736ecSDimitry Andric /// LastAccesses are not nullptrs.
isFilled__anon7bbaa8dc0111::OffloadArray909b60736ecSDimitry Andric bool isFilled() {
910b60736ecSDimitry Andric const unsigned NumValues = StoredValues.size();
911b60736ecSDimitry Andric for (unsigned I = 0; I < NumValues; ++I) {
912b60736ecSDimitry Andric if (!StoredValues[I] || !LastAccesses[I])
913b60736ecSDimitry Andric return false;
914b60736ecSDimitry Andric }
915b60736ecSDimitry Andric
916b60736ecSDimitry Andric return true;
917b60736ecSDimitry Andric }
918b60736ecSDimitry Andric };
919b60736ecSDimitry Andric
920cfca06d7SDimitry Andric struct OpenMPOpt {
921cfca06d7SDimitry Andric
922cfca06d7SDimitry Andric using OptimizationRemarkGetter =
923cfca06d7SDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)>;
924cfca06d7SDimitry Andric
OpenMPOpt__anon7bbaa8dc0111::OpenMPOpt925cfca06d7SDimitry Andric OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
926cfca06d7SDimitry Andric OptimizationRemarkGetter OREGetter,
927cfca06d7SDimitry Andric OMPInformationCache &OMPInfoCache, Attributor &A)
928cfca06d7SDimitry Andric : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
929cfca06d7SDimitry Andric OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
930cfca06d7SDimitry Andric
931b60736ecSDimitry Andric /// Check if any remarks are enabled for openmp-opt
remarksEnabled__anon7bbaa8dc0111::OpenMPOpt932b60736ecSDimitry Andric bool remarksEnabled() {
933b60736ecSDimitry Andric auto &Ctx = M.getContext();
934b60736ecSDimitry Andric return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
935b60736ecSDimitry Andric }
936b60736ecSDimitry Andric
9377fa27ce4SDimitry Andric /// Run all OpenMP optimizations on the underlying SCC.
run__anon7bbaa8dc0111::OpenMPOpt938344a3780SDimitry Andric bool run(bool IsModulePass) {
939cfca06d7SDimitry Andric if (SCC.empty())
940cfca06d7SDimitry Andric return false;
941cfca06d7SDimitry Andric
942cfca06d7SDimitry Andric bool Changed = false;
943cfca06d7SDimitry Andric
944cfca06d7SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
9457fa27ce4SDimitry Andric << " functions\n");
946cfca06d7SDimitry Andric
947344a3780SDimitry Andric if (IsModulePass) {
948344a3780SDimitry Andric Changed |= runAttributor(IsModulePass);
949344a3780SDimitry Andric
950344a3780SDimitry Andric // Recollect uses, in case Attributor deleted any.
951344a3780SDimitry Andric OMPInfoCache.recollectUses();
952344a3780SDimitry Andric
953344a3780SDimitry Andric // TODO: This should be folded into buildCustomStateMachine.
954344a3780SDimitry Andric Changed |= rewriteDeviceCodeStateMachine();
955344a3780SDimitry Andric
956344a3780SDimitry Andric if (remarksEnabled())
957344a3780SDimitry Andric analysisGlobalization();
958344a3780SDimitry Andric } else {
959cfca06d7SDimitry Andric if (PrintICVValues)
960cfca06d7SDimitry Andric printICVs();
961cfca06d7SDimitry Andric if (PrintOpenMPKernels)
962cfca06d7SDimitry Andric printKernels();
963cfca06d7SDimitry Andric
964344a3780SDimitry Andric Changed |= runAttributor(IsModulePass);
965cfca06d7SDimitry Andric
966cfca06d7SDimitry Andric // Recollect uses, in case Attributor deleted any.
967cfca06d7SDimitry Andric OMPInfoCache.recollectUses();
968cfca06d7SDimitry Andric
969cfca06d7SDimitry Andric Changed |= deleteParallelRegions();
970344a3780SDimitry Andric
971b60736ecSDimitry Andric if (HideMemoryTransferLatency)
972b60736ecSDimitry Andric Changed |= hideMemTransfersLatency();
973b60736ecSDimitry Andric Changed |= deduplicateRuntimeCalls();
974b60736ecSDimitry Andric if (EnableParallelRegionMerging) {
975b60736ecSDimitry Andric if (mergeParallelRegions()) {
976b60736ecSDimitry Andric deduplicateRuntimeCalls();
977b60736ecSDimitry Andric Changed = true;
978b60736ecSDimitry Andric }
979b60736ecSDimitry Andric }
980344a3780SDimitry Andric }
981cfca06d7SDimitry Andric
982b1c73532SDimitry Andric if (OMPInfoCache.OpenMPPostLink)
983b1c73532SDimitry Andric Changed |= removeRuntimeSymbols();
984b1c73532SDimitry Andric
985cfca06d7SDimitry Andric return Changed;
986cfca06d7SDimitry Andric }
987cfca06d7SDimitry Andric
988cfca06d7SDimitry Andric /// Print initial ICV values for testing.
989cfca06d7SDimitry Andric /// FIXME: This should be done from the Attributor once it is added.
printICVs__anon7bbaa8dc0111::OpenMPOpt990cfca06d7SDimitry Andric void printICVs() const {
991b60736ecSDimitry Andric InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
992b60736ecSDimitry Andric ICV_proc_bind};
993cfca06d7SDimitry Andric
994e3b55780SDimitry Andric for (Function *F : SCC) {
995cfca06d7SDimitry Andric for (auto ICV : ICVs) {
996cfca06d7SDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[ICV];
997344a3780SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) {
998344a3780SDimitry Andric return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
999cfca06d7SDimitry Andric << " Value: "
1000cfca06d7SDimitry Andric << (ICVInfo.InitValue
1001344a3780SDimitry Andric ? toString(ICVInfo.InitValue->getValue(), 10, true)
1002cfca06d7SDimitry Andric : "IMPLEMENTATION_DEFINED");
1003cfca06d7SDimitry Andric };
1004cfca06d7SDimitry Andric
1005344a3780SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
1006cfca06d7SDimitry Andric }
1007cfca06d7SDimitry Andric }
1008cfca06d7SDimitry Andric }
1009cfca06d7SDimitry Andric
1010cfca06d7SDimitry Andric /// Print OpenMP GPU kernels for testing.
printKernels__anon7bbaa8dc0111::OpenMPOpt1011cfca06d7SDimitry Andric void printKernels() const {
1012cfca06d7SDimitry Andric for (Function *F : SCC) {
1013b1c73532SDimitry Andric if (!omp::isOpenMPKernel(*F))
1014cfca06d7SDimitry Andric continue;
1015cfca06d7SDimitry Andric
1016344a3780SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) {
1017344a3780SDimitry Andric return ORA << "OpenMP GPU kernel "
1018cfca06d7SDimitry Andric << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
1019cfca06d7SDimitry Andric };
1020cfca06d7SDimitry Andric
1021344a3780SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
1022cfca06d7SDimitry Andric }
1023cfca06d7SDimitry Andric }
1024cfca06d7SDimitry Andric
1025cfca06d7SDimitry Andric /// Return the call if \p U is a callee use in a regular call. If \p RFI is
1026cfca06d7SDimitry Andric /// given it has to be the callee or a nullptr is returned.
getCallIfRegularCall__anon7bbaa8dc0111::OpenMPOpt1027cfca06d7SDimitry Andric static CallInst *getCallIfRegularCall(
1028cfca06d7SDimitry Andric Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
1029cfca06d7SDimitry Andric CallInst *CI = dyn_cast<CallInst>(U.getUser());
1030cfca06d7SDimitry Andric if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
1031344a3780SDimitry Andric (!RFI ||
1032344a3780SDimitry Andric (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
1033cfca06d7SDimitry Andric return CI;
1034cfca06d7SDimitry Andric return nullptr;
1035cfca06d7SDimitry Andric }
1036cfca06d7SDimitry Andric
1037cfca06d7SDimitry Andric /// Return the call if \p V is a regular call. If \p RFI is given it has to be
1038cfca06d7SDimitry Andric /// the callee or a nullptr is returned.
getCallIfRegularCall__anon7bbaa8dc0111::OpenMPOpt1039cfca06d7SDimitry Andric static CallInst *getCallIfRegularCall(
1040cfca06d7SDimitry Andric Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
1041cfca06d7SDimitry Andric CallInst *CI = dyn_cast<CallInst>(&V);
1042cfca06d7SDimitry Andric if (CI && !CI->hasOperandBundles() &&
1043344a3780SDimitry Andric (!RFI ||
1044344a3780SDimitry Andric (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
1045cfca06d7SDimitry Andric return CI;
1046cfca06d7SDimitry Andric return nullptr;
1047cfca06d7SDimitry Andric }
1048cfca06d7SDimitry Andric
1049cfca06d7SDimitry Andric private:
1050b60736ecSDimitry Andric /// Merge parallel regions when it is safe.
mergeParallelRegions__anon7bbaa8dc0111::OpenMPOpt1051b60736ecSDimitry Andric bool mergeParallelRegions() {
1052b60736ecSDimitry Andric const unsigned CallbackCalleeOperand = 2;
1053b60736ecSDimitry Andric const unsigned CallbackFirstArgOperand = 3;
1054b60736ecSDimitry Andric using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1055b60736ecSDimitry Andric
1056b60736ecSDimitry Andric // Check if there are any __kmpc_fork_call calls to merge.
1057b60736ecSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI =
1058b60736ecSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
1059b60736ecSDimitry Andric
1060b60736ecSDimitry Andric if (!RFI.Declaration)
1061b60736ecSDimitry Andric return false;
1062b60736ecSDimitry Andric
1063b60736ecSDimitry Andric // Unmergable calls that prevent merging a parallel region.
1064b60736ecSDimitry Andric OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
1065b60736ecSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
1066b60736ecSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
1067b60736ecSDimitry Andric };
1068b60736ecSDimitry Andric
1069b60736ecSDimitry Andric bool Changed = false;
1070b60736ecSDimitry Andric LoopInfo *LI = nullptr;
1071b60736ecSDimitry Andric DominatorTree *DT = nullptr;
1072b60736ecSDimitry Andric
1073b60736ecSDimitry Andric SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
1074b60736ecSDimitry Andric
1075b60736ecSDimitry Andric BasicBlock *StartBB = nullptr, *EndBB = nullptr;
1076145449b1SDimitry Andric auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1077b60736ecSDimitry Andric BasicBlock *CGStartBB = CodeGenIP.getBlock();
1078b60736ecSDimitry Andric BasicBlock *CGEndBB =
1079b60736ecSDimitry Andric SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
1080b60736ecSDimitry Andric assert(StartBB != nullptr && "StartBB should not be null");
1081b60736ecSDimitry Andric CGStartBB->getTerminator()->setSuccessor(0, StartBB);
1082b60736ecSDimitry Andric assert(EndBB != nullptr && "EndBB should not be null");
1083b60736ecSDimitry Andric EndBB->getTerminator()->setSuccessor(0, CGEndBB);
1084b60736ecSDimitry Andric };
1085b60736ecSDimitry Andric
1086b60736ecSDimitry Andric auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1087b60736ecSDimitry Andric Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
1088b60736ecSDimitry Andric ReplacementValue = &Inner;
1089b60736ecSDimitry Andric return CodeGenIP;
1090b60736ecSDimitry Andric };
1091b60736ecSDimitry Andric
1092b60736ecSDimitry Andric auto FiniCB = [&](InsertPointTy CodeGenIP) {};
1093b60736ecSDimitry Andric
1094b60736ecSDimitry Andric /// Create a sequential execution region within a merged parallel region,
1095b60736ecSDimitry Andric /// encapsulated in a master construct with a barrier for synchronization.
1096b60736ecSDimitry Andric auto CreateSequentialRegion = [&](Function *OuterFn,
1097b60736ecSDimitry Andric BasicBlock *OuterPredBB,
1098b60736ecSDimitry Andric Instruction *SeqStartI,
1099b60736ecSDimitry Andric Instruction *SeqEndI) {
1100b60736ecSDimitry Andric // Isolate the instructions of the sequential region to a separate
1101b60736ecSDimitry Andric // block.
1102b60736ecSDimitry Andric BasicBlock *ParentBB = SeqStartI->getParent();
1103b60736ecSDimitry Andric BasicBlock *SeqEndBB =
1104b60736ecSDimitry Andric SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
1105b60736ecSDimitry Andric BasicBlock *SeqAfterBB =
1106b60736ecSDimitry Andric SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
1107b60736ecSDimitry Andric BasicBlock *SeqStartBB =
1108b60736ecSDimitry Andric SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
1109b60736ecSDimitry Andric
1110b60736ecSDimitry Andric assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
1111b60736ecSDimitry Andric "Expected a different CFG");
1112b60736ecSDimitry Andric const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
1113b60736ecSDimitry Andric ParentBB->getTerminator()->eraseFromParent();
1114b60736ecSDimitry Andric
1115145449b1SDimitry Andric auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1116b60736ecSDimitry Andric BasicBlock *CGStartBB = CodeGenIP.getBlock();
1117b60736ecSDimitry Andric BasicBlock *CGEndBB =
1118b60736ecSDimitry Andric SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
1119b60736ecSDimitry Andric assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
1120b60736ecSDimitry Andric CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
1121b60736ecSDimitry Andric assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
1122b60736ecSDimitry Andric SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
1123b60736ecSDimitry Andric };
1124b60736ecSDimitry Andric auto FiniCB = [&](InsertPointTy CodeGenIP) {};
1125b60736ecSDimitry Andric
1126b60736ecSDimitry Andric // Find outputs from the sequential region to outside users and
1127b60736ecSDimitry Andric // broadcast their values to them.
1128b60736ecSDimitry Andric for (Instruction &I : *SeqStartBB) {
1129b60736ecSDimitry Andric SmallPtrSet<Instruction *, 4> OutsideUsers;
1130b60736ecSDimitry Andric for (User *Usr : I.users()) {
1131b60736ecSDimitry Andric Instruction &UsrI = *cast<Instruction>(Usr);
1132b60736ecSDimitry Andric // Ignore outputs to LT intrinsics, code extraction for the merged
1133b60736ecSDimitry Andric // parallel region will fix them.
1134b60736ecSDimitry Andric if (UsrI.isLifetimeStartOrEnd())
1135b60736ecSDimitry Andric continue;
1136b60736ecSDimitry Andric
1137b60736ecSDimitry Andric if (UsrI.getParent() != SeqStartBB)
1138b60736ecSDimitry Andric OutsideUsers.insert(&UsrI);
1139b60736ecSDimitry Andric }
1140b60736ecSDimitry Andric
1141b60736ecSDimitry Andric if (OutsideUsers.empty())
1142b60736ecSDimitry Andric continue;
1143b60736ecSDimitry Andric
1144b60736ecSDimitry Andric // Emit an alloca in the outer region to store the broadcasted
1145b60736ecSDimitry Andric // value.
1146b60736ecSDimitry Andric const DataLayout &DL = M.getDataLayout();
1147b60736ecSDimitry Andric AllocaInst *AllocaI = new AllocaInst(
1148b60736ecSDimitry Andric I.getType(), DL.getAllocaAddrSpace(), nullptr,
1149ac9a064cSDimitry Andric I.getName() + ".seq.output.alloc", OuterFn->front().begin());
1150b60736ecSDimitry Andric
1151b60736ecSDimitry Andric // Emit a store instruction in the sequential BB to update the
1152b60736ecSDimitry Andric // value.
1153ac9a064cSDimitry Andric new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()->getIterator());
1154b60736ecSDimitry Andric
1155b60736ecSDimitry Andric // Emit a load instruction and replace the use of the output value
1156b60736ecSDimitry Andric // with it.
1157b60736ecSDimitry Andric for (Instruction *UsrI : OutsideUsers) {
1158ac9a064cSDimitry Andric LoadInst *LoadI = new LoadInst(I.getType(), AllocaI,
1159ac9a064cSDimitry Andric I.getName() + ".seq.output.load",
1160ac9a064cSDimitry Andric UsrI->getIterator());
1161b60736ecSDimitry Andric UsrI->replaceUsesOfWith(&I, LoadI);
1162b60736ecSDimitry Andric }
1163b60736ecSDimitry Andric }
1164b60736ecSDimitry Andric
1165b60736ecSDimitry Andric OpenMPIRBuilder::LocationDescription Loc(
1166b60736ecSDimitry Andric InsertPointTy(ParentBB, ParentBB->end()), DL);
1167b60736ecSDimitry Andric InsertPointTy SeqAfterIP =
1168b60736ecSDimitry Andric OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
1169b60736ecSDimitry Andric
1170b60736ecSDimitry Andric OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
1171b60736ecSDimitry Andric
1172b60736ecSDimitry Andric BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
1173b60736ecSDimitry Andric
1174b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
1175b60736ecSDimitry Andric << "\n");
1176b60736ecSDimitry Andric };
1177b60736ecSDimitry Andric
1178b60736ecSDimitry Andric // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
1179b60736ecSDimitry Andric // contained in BB and only separated by instructions that can be
1180b60736ecSDimitry Andric // redundantly executed in parallel. The block BB is split before the first
1181b60736ecSDimitry Andric // call (in MergableCIs) and after the last so the entire region we merge
1182b60736ecSDimitry Andric // into a single parallel region is contained in a single basic block
1183b60736ecSDimitry Andric // without any other instructions. We use the OpenMPIRBuilder to outline
1184b60736ecSDimitry Andric // that block and call the resulting function via __kmpc_fork_call.
11856f8fc217SDimitry Andric auto Merge = [&](const SmallVectorImpl<CallInst *> &MergableCIs,
11866f8fc217SDimitry Andric BasicBlock *BB) {
1187b60736ecSDimitry Andric // TODO: Change the interface to allow single CIs expanded, e.g, to
1188b60736ecSDimitry Andric // include an outer loop.
1189b60736ecSDimitry Andric assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
1190b60736ecSDimitry Andric
1191b60736ecSDimitry Andric auto Remark = [&](OptimizationRemark OR) {
1192344a3780SDimitry Andric OR << "Parallel region merged with parallel region"
1193344a3780SDimitry Andric << (MergableCIs.size() > 2 ? "s" : "") << " at ";
1194b60736ecSDimitry Andric for (auto *CI : llvm::drop_begin(MergableCIs)) {
1195b60736ecSDimitry Andric OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
1196b60736ecSDimitry Andric if (CI != MergableCIs.back())
1197b60736ecSDimitry Andric OR << ", ";
1198b60736ecSDimitry Andric }
1199344a3780SDimitry Andric return OR << ".";
1200b60736ecSDimitry Andric };
1201b60736ecSDimitry Andric
1202344a3780SDimitry Andric emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark);
1203b60736ecSDimitry Andric
1204b60736ecSDimitry Andric Function *OriginalFn = BB->getParent();
1205b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
1206b60736ecSDimitry Andric << " parallel regions in " << OriginalFn->getName()
1207b60736ecSDimitry Andric << "\n");
1208b60736ecSDimitry Andric
1209b60736ecSDimitry Andric // Isolate the calls to merge in a separate block.
1210b60736ecSDimitry Andric EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
1211b60736ecSDimitry Andric BasicBlock *AfterBB =
1212b60736ecSDimitry Andric SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
1213b60736ecSDimitry Andric StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
1214b60736ecSDimitry Andric "omp.par.merged");
1215b60736ecSDimitry Andric
1216b60736ecSDimitry Andric assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
1217b60736ecSDimitry Andric const DebugLoc DL = BB->getTerminator()->getDebugLoc();
1218b60736ecSDimitry Andric BB->getTerminator()->eraseFromParent();
1219b60736ecSDimitry Andric
1220b60736ecSDimitry Andric // Create sequential regions for sequential instructions that are
1221b60736ecSDimitry Andric // in-between mergable parallel regions.
1222b60736ecSDimitry Andric for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
1223b60736ecSDimitry Andric It != End; ++It) {
1224b60736ecSDimitry Andric Instruction *ForkCI = *It;
1225b60736ecSDimitry Andric Instruction *NextForkCI = *(It + 1);
1226b60736ecSDimitry Andric
1227b60736ecSDimitry Andric // Continue if there are not in-between instructions.
1228b60736ecSDimitry Andric if (ForkCI->getNextNode() == NextForkCI)
1229b60736ecSDimitry Andric continue;
1230b60736ecSDimitry Andric
1231b60736ecSDimitry Andric CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
1232b60736ecSDimitry Andric NextForkCI->getPrevNode());
1233b60736ecSDimitry Andric }
1234b60736ecSDimitry Andric
1235b60736ecSDimitry Andric OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
1236b60736ecSDimitry Andric DL);
1237b60736ecSDimitry Andric IRBuilder<>::InsertPoint AllocaIP(
1238b60736ecSDimitry Andric &OriginalFn->getEntryBlock(),
1239b60736ecSDimitry Andric OriginalFn->getEntryBlock().getFirstInsertionPt());
1240b60736ecSDimitry Andric // Create the merged parallel region with default proc binding, to
1241b60736ecSDimitry Andric // avoid overriding binding settings, and without explicit cancellation.
1242b60736ecSDimitry Andric InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
1243b60736ecSDimitry Andric Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
1244b60736ecSDimitry Andric OMP_PROC_BIND_default, /* IsCancellable */ false);
1245b60736ecSDimitry Andric BranchInst::Create(AfterBB, AfterIP.getBlock());
1246b60736ecSDimitry Andric
1247b60736ecSDimitry Andric // Perform the actual outlining.
12486f8fc217SDimitry Andric OMPInfoCache.OMPBuilder.finalize(OriginalFn);
1249b60736ecSDimitry Andric
1250b60736ecSDimitry Andric Function *OutlinedFn = MergableCIs.front()->getCaller();
1251b60736ecSDimitry Andric
1252b60736ecSDimitry Andric // Replace the __kmpc_fork_call calls with direct calls to the outlined
1253b60736ecSDimitry Andric // callbacks.
1254b60736ecSDimitry Andric SmallVector<Value *, 8> Args;
1255b60736ecSDimitry Andric for (auto *CI : MergableCIs) {
1256145449b1SDimitry Andric Value *Callee = CI->getArgOperand(CallbackCalleeOperand);
1257145449b1SDimitry Andric FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask;
1258b60736ecSDimitry Andric Args.clear();
1259b60736ecSDimitry Andric Args.push_back(OutlinedFn->getArg(0));
1260b60736ecSDimitry Andric Args.push_back(OutlinedFn->getArg(1));
1261c0981da4SDimitry Andric for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
1262c0981da4SDimitry Andric ++U)
1263b60736ecSDimitry Andric Args.push_back(CI->getArgOperand(U));
1264b60736ecSDimitry Andric
1265ac9a064cSDimitry Andric CallInst *NewCI =
1266ac9a064cSDimitry Andric CallInst::Create(FT, Callee, Args, "", CI->getIterator());
1267b60736ecSDimitry Andric if (CI->getDebugLoc())
1268b60736ecSDimitry Andric NewCI->setDebugLoc(CI->getDebugLoc());
1269b60736ecSDimitry Andric
1270b60736ecSDimitry Andric // Forward parameter attributes from the callback to the callee.
1271c0981da4SDimitry Andric for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
1272c0981da4SDimitry Andric ++U)
1273c0981da4SDimitry Andric for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
1274b60736ecSDimitry Andric NewCI->addParamAttr(
1275b60736ecSDimitry Andric U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
1276b60736ecSDimitry Andric
1277b60736ecSDimitry Andric // Emit an explicit barrier to replace the implicit fork-join barrier.
1278b60736ecSDimitry Andric if (CI != MergableCIs.back()) {
1279b60736ecSDimitry Andric // TODO: Remove barrier if the merged parallel region includes the
1280b60736ecSDimitry Andric // 'nowait' clause.
1281b60736ecSDimitry Andric OMPInfoCache.OMPBuilder.createBarrier(
1282b60736ecSDimitry Andric InsertPointTy(NewCI->getParent(),
1283b60736ecSDimitry Andric NewCI->getNextNode()->getIterator()),
1284b60736ecSDimitry Andric OMPD_parallel);
1285b60736ecSDimitry Andric }
1286b60736ecSDimitry Andric
1287b60736ecSDimitry Andric CI->eraseFromParent();
1288b60736ecSDimitry Andric }
1289b60736ecSDimitry Andric
1290b60736ecSDimitry Andric assert(OutlinedFn != OriginalFn && "Outlining failed");
1291b60736ecSDimitry Andric CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
1292b60736ecSDimitry Andric CGUpdater.reanalyzeFunction(*OriginalFn);
1293b60736ecSDimitry Andric
1294b60736ecSDimitry Andric NumOpenMPParallelRegionsMerged += MergableCIs.size();
1295b60736ecSDimitry Andric
1296b60736ecSDimitry Andric return true;
1297b60736ecSDimitry Andric };
1298b60736ecSDimitry Andric
1299b60736ecSDimitry Andric // Helper function that identifes sequences of
1300b60736ecSDimitry Andric // __kmpc_fork_call uses in a basic block.
1301b60736ecSDimitry Andric auto DetectPRsCB = [&](Use &U, Function &F) {
1302b60736ecSDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI);
1303b60736ecSDimitry Andric BB2PRMap[CI->getParent()].insert(CI);
1304b60736ecSDimitry Andric
1305b60736ecSDimitry Andric return false;
1306b60736ecSDimitry Andric };
1307b60736ecSDimitry Andric
1308b60736ecSDimitry Andric BB2PRMap.clear();
1309b60736ecSDimitry Andric RFI.foreachUse(SCC, DetectPRsCB);
1310b60736ecSDimitry Andric SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
1311b60736ecSDimitry Andric // Find mergable parallel regions within a basic block that are
1312b60736ecSDimitry Andric // safe to merge, that is any in-between instructions can safely
1313b60736ecSDimitry Andric // execute in parallel after merging.
1314b60736ecSDimitry Andric // TODO: support merging across basic-blocks.
1315b60736ecSDimitry Andric for (auto &It : BB2PRMap) {
1316b60736ecSDimitry Andric auto &CIs = It.getSecond();
1317b60736ecSDimitry Andric if (CIs.size() < 2)
1318b60736ecSDimitry Andric continue;
1319b60736ecSDimitry Andric
1320b60736ecSDimitry Andric BasicBlock *BB = It.getFirst();
1321b60736ecSDimitry Andric SmallVector<CallInst *, 4> MergableCIs;
1322b60736ecSDimitry Andric
1323b60736ecSDimitry Andric /// Returns true if the instruction is mergable, false otherwise.
1324b60736ecSDimitry Andric /// A terminator instruction is unmergable by definition since merging
1325b60736ecSDimitry Andric /// works within a BB. Instructions before the mergable region are
1326b60736ecSDimitry Andric /// mergable if they are not calls to OpenMP runtime functions that may
1327b60736ecSDimitry Andric /// set different execution parameters for subsequent parallel regions.
1328b60736ecSDimitry Andric /// Instructions in-between parallel regions are mergable if they are not
1329b60736ecSDimitry Andric /// calls to any non-intrinsic function since that may call a non-mergable
1330b60736ecSDimitry Andric /// OpenMP runtime function.
1331b60736ecSDimitry Andric auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
1332b60736ecSDimitry Andric // We do not merge across BBs, hence return false (unmergable) if the
1333b60736ecSDimitry Andric // instruction is a terminator.
1334b60736ecSDimitry Andric if (I.isTerminator())
1335b60736ecSDimitry Andric return false;
1336b60736ecSDimitry Andric
1337b60736ecSDimitry Andric if (!isa<CallInst>(&I))
1338b60736ecSDimitry Andric return true;
1339b60736ecSDimitry Andric
1340b60736ecSDimitry Andric CallInst *CI = cast<CallInst>(&I);
1341b60736ecSDimitry Andric if (IsBeforeMergableRegion) {
1342b60736ecSDimitry Andric Function *CalledFunction = CI->getCalledFunction();
1343b60736ecSDimitry Andric if (!CalledFunction)
1344b60736ecSDimitry Andric return false;
1345b60736ecSDimitry Andric // Return false (unmergable) if the call before the parallel
1346b60736ecSDimitry Andric // region calls an explicit affinity (proc_bind) or number of
1347b60736ecSDimitry Andric // threads (num_threads) compiler-generated function. Those settings
1348b60736ecSDimitry Andric // may be incompatible with following parallel regions.
1349b60736ecSDimitry Andric // TODO: ICV tracking to detect compatibility.
1350b60736ecSDimitry Andric for (const auto &RFI : UnmergableCallsInfo) {
1351b60736ecSDimitry Andric if (CalledFunction == RFI.Declaration)
1352b60736ecSDimitry Andric return false;
1353b60736ecSDimitry Andric }
1354b60736ecSDimitry Andric } else {
1355b60736ecSDimitry Andric // Return false (unmergable) if there is a call instruction
1356b60736ecSDimitry Andric // in-between parallel regions when it is not an intrinsic. It
1357b60736ecSDimitry Andric // may call an unmergable OpenMP runtime function in its callpath.
1358b60736ecSDimitry Andric // TODO: Keep track of possible OpenMP calls in the callpath.
1359b60736ecSDimitry Andric if (!isa<IntrinsicInst>(CI))
1360b60736ecSDimitry Andric return false;
1361b60736ecSDimitry Andric }
1362b60736ecSDimitry Andric
1363b60736ecSDimitry Andric return true;
1364b60736ecSDimitry Andric };
1365b60736ecSDimitry Andric // Find maximal number of parallel region CIs that are safe to merge.
1366b60736ecSDimitry Andric for (auto It = BB->begin(), End = BB->end(); It != End;) {
1367b60736ecSDimitry Andric Instruction &I = *It;
1368b60736ecSDimitry Andric ++It;
1369b60736ecSDimitry Andric
1370b60736ecSDimitry Andric if (CIs.count(&I)) {
1371b60736ecSDimitry Andric MergableCIs.push_back(cast<CallInst>(&I));
1372b60736ecSDimitry Andric continue;
1373b60736ecSDimitry Andric }
1374b60736ecSDimitry Andric
1375b60736ecSDimitry Andric // Continue expanding if the instruction is mergable.
1376b60736ecSDimitry Andric if (IsMergable(I, MergableCIs.empty()))
1377b60736ecSDimitry Andric continue;
1378b60736ecSDimitry Andric
1379b60736ecSDimitry Andric // Forward the instruction iterator to skip the next parallel region
1380b60736ecSDimitry Andric // since there is an unmergable instruction which can affect it.
1381b60736ecSDimitry Andric for (; It != End; ++It) {
1382b60736ecSDimitry Andric Instruction &SkipI = *It;
1383b60736ecSDimitry Andric if (CIs.count(&SkipI)) {
1384b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
1385b60736ecSDimitry Andric << " due to " << I << "\n");
1386b60736ecSDimitry Andric ++It;
1387b60736ecSDimitry Andric break;
1388b60736ecSDimitry Andric }
1389b60736ecSDimitry Andric }
1390b60736ecSDimitry Andric
1391b60736ecSDimitry Andric // Store mergable regions found.
1392b60736ecSDimitry Andric if (MergableCIs.size() > 1) {
1393b60736ecSDimitry Andric MergableCIsVector.push_back(MergableCIs);
1394b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
1395b60736ecSDimitry Andric << " parallel regions in block " << BB->getName()
1396b60736ecSDimitry Andric << " of function " << BB->getParent()->getName()
1397b60736ecSDimitry Andric << "\n";);
1398b60736ecSDimitry Andric }
1399b60736ecSDimitry Andric
1400b60736ecSDimitry Andric MergableCIs.clear();
1401b60736ecSDimitry Andric }
1402b60736ecSDimitry Andric
1403b60736ecSDimitry Andric if (!MergableCIsVector.empty()) {
1404b60736ecSDimitry Andric Changed = true;
1405b60736ecSDimitry Andric
1406b60736ecSDimitry Andric for (auto &MergableCIs : MergableCIsVector)
1407b60736ecSDimitry Andric Merge(MergableCIs, BB);
1408344a3780SDimitry Andric MergableCIsVector.clear();
1409b60736ecSDimitry Andric }
1410b60736ecSDimitry Andric }
1411b60736ecSDimitry Andric
1412b60736ecSDimitry Andric if (Changed) {
1413b60736ecSDimitry Andric /// Re-collect use for fork calls, emitted barrier calls, and
1414b60736ecSDimitry Andric /// any emitted master/end_master calls.
1415b60736ecSDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
1416b60736ecSDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
1417b60736ecSDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
1418b60736ecSDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
1419b60736ecSDimitry Andric }
1420b60736ecSDimitry Andric
1421b60736ecSDimitry Andric return Changed;
1422b60736ecSDimitry Andric }
1423b60736ecSDimitry Andric
1424cfca06d7SDimitry Andric /// Try to delete parallel regions if possible.
deleteParallelRegions__anon7bbaa8dc0111::OpenMPOpt1425cfca06d7SDimitry Andric bool deleteParallelRegions() {
1426cfca06d7SDimitry Andric const unsigned CallbackCalleeOperand = 2;
1427cfca06d7SDimitry Andric
1428cfca06d7SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI =
1429cfca06d7SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
1430cfca06d7SDimitry Andric
1431cfca06d7SDimitry Andric if (!RFI.Declaration)
1432cfca06d7SDimitry Andric return false;
1433cfca06d7SDimitry Andric
1434cfca06d7SDimitry Andric bool Changed = false;
1435cfca06d7SDimitry Andric auto DeleteCallCB = [&](Use &U, Function &) {
1436cfca06d7SDimitry Andric CallInst *CI = getCallIfRegularCall(U);
1437cfca06d7SDimitry Andric if (!CI)
1438cfca06d7SDimitry Andric return false;
1439cfca06d7SDimitry Andric auto *Fn = dyn_cast<Function>(
1440cfca06d7SDimitry Andric CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1441cfca06d7SDimitry Andric if (!Fn)
1442cfca06d7SDimitry Andric return false;
1443cfca06d7SDimitry Andric if (!Fn->onlyReadsMemory())
1444cfca06d7SDimitry Andric return false;
1445cfca06d7SDimitry Andric if (!Fn->hasFnAttribute(Attribute::WillReturn))
1446cfca06d7SDimitry Andric return false;
1447cfca06d7SDimitry Andric
1448cfca06d7SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
1449cfca06d7SDimitry Andric << CI->getCaller()->getName() << "\n");
1450cfca06d7SDimitry Andric
1451cfca06d7SDimitry Andric auto Remark = [&](OptimizationRemark OR) {
1452344a3780SDimitry Andric return OR << "Removing parallel region with no side-effects.";
1453cfca06d7SDimitry Andric };
1454344a3780SDimitry Andric emitRemark<OptimizationRemark>(CI, "OMP160", Remark);
1455cfca06d7SDimitry Andric
1456cfca06d7SDimitry Andric CI->eraseFromParent();
1457cfca06d7SDimitry Andric Changed = true;
1458cfca06d7SDimitry Andric ++NumOpenMPParallelRegionsDeleted;
1459cfca06d7SDimitry Andric return true;
1460cfca06d7SDimitry Andric };
1461cfca06d7SDimitry Andric
1462cfca06d7SDimitry Andric RFI.foreachUse(SCC, DeleteCallCB);
1463cfca06d7SDimitry Andric
1464cfca06d7SDimitry Andric return Changed;
1465cfca06d7SDimitry Andric }
1466cfca06d7SDimitry Andric
1467cfca06d7SDimitry Andric /// Try to eliminate runtime calls by reusing existing ones.
deduplicateRuntimeCalls__anon7bbaa8dc0111::OpenMPOpt1468cfca06d7SDimitry Andric bool deduplicateRuntimeCalls() {
1469cfca06d7SDimitry Andric bool Changed = false;
1470cfca06d7SDimitry Andric
1471cfca06d7SDimitry Andric RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1472cfca06d7SDimitry Andric OMPRTL_omp_get_num_threads,
1473cfca06d7SDimitry Andric OMPRTL_omp_in_parallel,
1474cfca06d7SDimitry Andric OMPRTL_omp_get_cancellation,
1475cfca06d7SDimitry Andric OMPRTL_omp_get_supported_active_levels,
1476cfca06d7SDimitry Andric OMPRTL_omp_get_level,
1477cfca06d7SDimitry Andric OMPRTL_omp_get_ancestor_thread_num,
1478cfca06d7SDimitry Andric OMPRTL_omp_get_team_size,
1479cfca06d7SDimitry Andric OMPRTL_omp_get_active_level,
1480cfca06d7SDimitry Andric OMPRTL_omp_in_final,
1481cfca06d7SDimitry Andric OMPRTL_omp_get_proc_bind,
1482cfca06d7SDimitry Andric OMPRTL_omp_get_num_places,
1483cfca06d7SDimitry Andric OMPRTL_omp_get_num_procs,
1484cfca06d7SDimitry Andric OMPRTL_omp_get_place_num,
1485cfca06d7SDimitry Andric OMPRTL_omp_get_partition_num_places,
1486cfca06d7SDimitry Andric OMPRTL_omp_get_partition_place_nums};
1487cfca06d7SDimitry Andric
1488cfca06d7SDimitry Andric // Global-tid is handled separately.
1489cfca06d7SDimitry Andric SmallSetVector<Value *, 16> GTIdArgs;
1490cfca06d7SDimitry Andric collectGlobalThreadIdArguments(GTIdArgs);
1491cfca06d7SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
1492cfca06d7SDimitry Andric << " global thread ID arguments\n");
1493cfca06d7SDimitry Andric
1494cfca06d7SDimitry Andric for (Function *F : SCC) {
1495cfca06d7SDimitry Andric for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
1496b60736ecSDimitry Andric Changed |= deduplicateRuntimeCalls(
1497b60736ecSDimitry Andric *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1498cfca06d7SDimitry Andric
1499cfca06d7SDimitry Andric // __kmpc_global_thread_num is special as we can replace it with an
1500cfca06d7SDimitry Andric // argument in enough cases to make it worth trying.
1501cfca06d7SDimitry Andric Value *GTIdArg = nullptr;
1502cfca06d7SDimitry Andric for (Argument &Arg : F->args())
1503cfca06d7SDimitry Andric if (GTIdArgs.count(&Arg)) {
1504cfca06d7SDimitry Andric GTIdArg = &Arg;
1505cfca06d7SDimitry Andric break;
1506cfca06d7SDimitry Andric }
1507cfca06d7SDimitry Andric Changed |= deduplicateRuntimeCalls(
1508cfca06d7SDimitry Andric *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
1509cfca06d7SDimitry Andric }
1510cfca06d7SDimitry Andric
1511cfca06d7SDimitry Andric return Changed;
1512cfca06d7SDimitry Andric }
1513cfca06d7SDimitry Andric
1514b1c73532SDimitry Andric /// Tries to remove known runtime symbols that are optional from the module.
removeRuntimeSymbols__anon7bbaa8dc0111::OpenMPOpt1515b1c73532SDimitry Andric bool removeRuntimeSymbols() {
1516b1c73532SDimitry Andric // The RPC client symbol is defined in `libc` and indicates that something
1517b1c73532SDimitry Andric // required an RPC server. If its users were all optimized out then we can
1518b1c73532SDimitry Andric // safely remove it.
1519b1c73532SDimitry Andric // TODO: This should be somewhere more common in the future.
1520b1c73532SDimitry Andric if (GlobalVariable *GV = M.getNamedGlobal("__llvm_libc_rpc_client")) {
1521b1c73532SDimitry Andric if (!GV->getType()->isPointerTy())
1522b1c73532SDimitry Andric return false;
1523b1c73532SDimitry Andric
1524b1c73532SDimitry Andric Constant *C = GV->getInitializer();
1525b1c73532SDimitry Andric if (!C)
1526b1c73532SDimitry Andric return false;
1527b1c73532SDimitry Andric
1528b1c73532SDimitry Andric // Check to see if the only user of the RPC client is the external handle.
1529b1c73532SDimitry Andric GlobalVariable *Client = dyn_cast<GlobalVariable>(C->stripPointerCasts());
1530b1c73532SDimitry Andric if (!Client || Client->getNumUses() > 1 ||
1531b1c73532SDimitry Andric Client->user_back() != GV->getInitializer())
1532b1c73532SDimitry Andric return false;
1533b1c73532SDimitry Andric
1534b1c73532SDimitry Andric Client->replaceAllUsesWith(PoisonValue::get(Client->getType()));
1535b1c73532SDimitry Andric Client->eraseFromParent();
1536b1c73532SDimitry Andric
1537b1c73532SDimitry Andric GV->replaceAllUsesWith(PoisonValue::get(GV->getType()));
1538b1c73532SDimitry Andric GV->eraseFromParent();
1539b1c73532SDimitry Andric
1540b1c73532SDimitry Andric return true;
1541b1c73532SDimitry Andric }
1542b1c73532SDimitry Andric return false;
1543b1c73532SDimitry Andric }
1544b1c73532SDimitry Andric
1545b60736ecSDimitry Andric /// Tries to hide the latency of runtime calls that involve host to
1546b60736ecSDimitry Andric /// device memory transfers by splitting them into their "issue" and "wait"
1547b60736ecSDimitry Andric /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1548b60736ecSDimitry Andric /// moved downards as much as possible. The "issue" issues the memory transfer
1549b60736ecSDimitry Andric /// asynchronously, returning a handle. The "wait" waits in the returned
1550b60736ecSDimitry Andric /// handle for the memory transfer to finish.
hideMemTransfersLatency__anon7bbaa8dc0111::OpenMPOpt1551b60736ecSDimitry Andric bool hideMemTransfersLatency() {
1552b60736ecSDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1553b60736ecSDimitry Andric bool Changed = false;
1554b60736ecSDimitry Andric auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1555b60736ecSDimitry Andric auto *RTCall = getCallIfRegularCall(U, &RFI);
1556b60736ecSDimitry Andric if (!RTCall)
1557b60736ecSDimitry Andric return false;
1558b60736ecSDimitry Andric
1559b60736ecSDimitry Andric OffloadArray OffloadArrays[3];
1560b60736ecSDimitry Andric if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
1561b60736ecSDimitry Andric return false;
1562b60736ecSDimitry Andric
1563b60736ecSDimitry Andric LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
1564b60736ecSDimitry Andric
1565b60736ecSDimitry Andric // TODO: Check if can be moved upwards.
1566b60736ecSDimitry Andric bool WasSplit = false;
1567b60736ecSDimitry Andric Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1568b60736ecSDimitry Andric if (WaitMovementPoint)
1569b60736ecSDimitry Andric WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1570b60736ecSDimitry Andric
1571b60736ecSDimitry Andric Changed |= WasSplit;
1572b60736ecSDimitry Andric return WasSplit;
1573b60736ecSDimitry Andric };
15747fa27ce4SDimitry Andric if (OMPInfoCache.runtimeFnsAvailable(
15757fa27ce4SDimitry Andric {OMPRTL___tgt_target_data_begin_mapper_issue,
15767fa27ce4SDimitry Andric OMPRTL___tgt_target_data_begin_mapper_wait}))
1577b60736ecSDimitry Andric RFI.foreachUse(SCC, SplitMemTransfers);
1578b60736ecSDimitry Andric
1579b60736ecSDimitry Andric return Changed;
1580b60736ecSDimitry Andric }
1581b60736ecSDimitry Andric
analysisGlobalization__anon7bbaa8dc0111::OpenMPOpt1582b60736ecSDimitry Andric void analysisGlobalization() {
1583344a3780SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
1584b60736ecSDimitry Andric
1585b60736ecSDimitry Andric auto CheckGlobalization = [&](Use &U, Function &Decl) {
1586b60736ecSDimitry Andric if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
1587344a3780SDimitry Andric auto Remark = [&](OptimizationRemarkMissed ORM) {
1588344a3780SDimitry Andric return ORM
1589b60736ecSDimitry Andric << "Found thread data sharing on the GPU. "
1590b60736ecSDimitry Andric << "Expect degraded performance due to data globalization.";
1591b60736ecSDimitry Andric };
1592344a3780SDimitry Andric emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark);
1593b60736ecSDimitry Andric }
1594b60736ecSDimitry Andric
1595b60736ecSDimitry Andric return false;
1596b60736ecSDimitry Andric };
1597b60736ecSDimitry Andric
1598b60736ecSDimitry Andric RFI.foreachUse(SCC, CheckGlobalization);
1599b60736ecSDimitry Andric }
1600b60736ecSDimitry Andric
1601b60736ecSDimitry Andric /// Maps the values stored in the offload arrays passed as arguments to
1602b60736ecSDimitry Andric /// \p RuntimeCall into the offload arrays in \p OAs.
getValuesInOffloadArrays__anon7bbaa8dc0111::OpenMPOpt1603b60736ecSDimitry Andric bool getValuesInOffloadArrays(CallInst &RuntimeCall,
1604b60736ecSDimitry Andric MutableArrayRef<OffloadArray> OAs) {
1605b60736ecSDimitry Andric assert(OAs.size() == 3 && "Need space for three offload arrays!");
1606b60736ecSDimitry Andric
1607b60736ecSDimitry Andric // A runtime call that involves memory offloading looks something like:
1608b60736ecSDimitry Andric // call void @__tgt_target_data_begin_mapper(arg0, arg1,
1609b60736ecSDimitry Andric // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
1610b60736ecSDimitry Andric // ...)
1611b60736ecSDimitry Andric // So, the idea is to access the allocas that allocate space for these
1612b60736ecSDimitry Andric // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
1613b60736ecSDimitry Andric // Therefore:
1614b60736ecSDimitry Andric // i8** %offload_baseptrs.
1615b60736ecSDimitry Andric Value *BasePtrsArg =
1616b60736ecSDimitry Andric RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
1617b60736ecSDimitry Andric // i8** %offload_ptrs.
1618b60736ecSDimitry Andric Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
1619b60736ecSDimitry Andric // i8** %offload_sizes.
1620b60736ecSDimitry Andric Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
1621b60736ecSDimitry Andric
1622b60736ecSDimitry Andric // Get values stored in **offload_baseptrs.
1623b60736ecSDimitry Andric auto *V = getUnderlyingObject(BasePtrsArg);
1624b60736ecSDimitry Andric if (!isa<AllocaInst>(V))
1625b60736ecSDimitry Andric return false;
1626b60736ecSDimitry Andric auto *BasePtrsArray = cast<AllocaInst>(V);
1627b60736ecSDimitry Andric if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
1628b60736ecSDimitry Andric return false;
1629b60736ecSDimitry Andric
1630b60736ecSDimitry Andric // Get values stored in **offload_baseptrs.
1631b60736ecSDimitry Andric V = getUnderlyingObject(PtrsArg);
1632b60736ecSDimitry Andric if (!isa<AllocaInst>(V))
1633b60736ecSDimitry Andric return false;
1634b60736ecSDimitry Andric auto *PtrsArray = cast<AllocaInst>(V);
1635b60736ecSDimitry Andric if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
1636b60736ecSDimitry Andric return false;
1637b60736ecSDimitry Andric
1638b60736ecSDimitry Andric // Get values stored in **offload_sizes.
1639b60736ecSDimitry Andric V = getUnderlyingObject(SizesArg);
1640b60736ecSDimitry Andric // If it's a [constant] global array don't analyze it.
1641b60736ecSDimitry Andric if (isa<GlobalValue>(V))
1642b60736ecSDimitry Andric return isa<Constant>(V);
1643b60736ecSDimitry Andric if (!isa<AllocaInst>(V))
1644b60736ecSDimitry Andric return false;
1645b60736ecSDimitry Andric
1646b60736ecSDimitry Andric auto *SizesArray = cast<AllocaInst>(V);
1647b60736ecSDimitry Andric if (!OAs[2].initialize(*SizesArray, RuntimeCall))
1648b60736ecSDimitry Andric return false;
1649b60736ecSDimitry Andric
1650b60736ecSDimitry Andric return true;
1651b60736ecSDimitry Andric }
1652b60736ecSDimitry Andric
1653b60736ecSDimitry Andric /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
1654b60736ecSDimitry Andric /// For now this is a way to test that the function getValuesInOffloadArrays
1655b60736ecSDimitry Andric /// is working properly.
1656b60736ecSDimitry Andric /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
dumpValuesInOffloadArrays__anon7bbaa8dc0111::OpenMPOpt1657b60736ecSDimitry Andric void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
1658b60736ecSDimitry Andric assert(OAs.size() == 3 && "There are three offload arrays to debug!");
1659b60736ecSDimitry Andric
1660b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
1661b60736ecSDimitry Andric std::string ValuesStr;
1662b60736ecSDimitry Andric raw_string_ostream Printer(ValuesStr);
1663b60736ecSDimitry Andric std::string Separator = " --- ";
1664b60736ecSDimitry Andric
1665b60736ecSDimitry Andric for (auto *BP : OAs[0].StoredValues) {
1666b60736ecSDimitry Andric BP->print(Printer);
1667b60736ecSDimitry Andric Printer << Separator;
1668b60736ecSDimitry Andric }
1669ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << ValuesStr << "\n");
1670b60736ecSDimitry Andric ValuesStr.clear();
1671b60736ecSDimitry Andric
1672b60736ecSDimitry Andric for (auto *P : OAs[1].StoredValues) {
1673b60736ecSDimitry Andric P->print(Printer);
1674b60736ecSDimitry Andric Printer << Separator;
1675b60736ecSDimitry Andric }
1676ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << ValuesStr << "\n");
1677b60736ecSDimitry Andric ValuesStr.clear();
1678b60736ecSDimitry Andric
1679b60736ecSDimitry Andric for (auto *S : OAs[2].StoredValues) {
1680b60736ecSDimitry Andric S->print(Printer);
1681b60736ecSDimitry Andric Printer << Separator;
1682b60736ecSDimitry Andric }
1683ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << ValuesStr << "\n");
1684b60736ecSDimitry Andric }
1685b60736ecSDimitry Andric
1686b60736ecSDimitry Andric /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1687b60736ecSDimitry Andric /// moved. Returns nullptr if the movement is not possible, or not worth it.
canBeMovedDownwards__anon7bbaa8dc0111::OpenMPOpt1688b60736ecSDimitry Andric Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1689b60736ecSDimitry Andric // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1690b60736ecSDimitry Andric // Make it traverse the CFG.
1691b60736ecSDimitry Andric
1692b60736ecSDimitry Andric Instruction *CurrentI = &RuntimeCall;
1693b60736ecSDimitry Andric bool IsWorthIt = false;
1694b60736ecSDimitry Andric while ((CurrentI = CurrentI->getNextNode())) {
1695b60736ecSDimitry Andric
1696b60736ecSDimitry Andric // TODO: Once we detect the regions to be offloaded we should use the
1697b60736ecSDimitry Andric // alias analysis manager to check if CurrentI may modify one of
1698b60736ecSDimitry Andric // the offloaded regions.
1699b60736ecSDimitry Andric if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1700b60736ecSDimitry Andric if (IsWorthIt)
1701b60736ecSDimitry Andric return CurrentI;
1702b60736ecSDimitry Andric
1703b60736ecSDimitry Andric return nullptr;
1704b60736ecSDimitry Andric }
1705b60736ecSDimitry Andric
1706b60736ecSDimitry Andric // FIXME: For now if we move it over anything without side effect
1707b60736ecSDimitry Andric // is worth it.
1708b60736ecSDimitry Andric IsWorthIt = true;
1709b60736ecSDimitry Andric }
1710b60736ecSDimitry Andric
1711b60736ecSDimitry Andric // Return end of BasicBlock.
1712b60736ecSDimitry Andric return RuntimeCall.getParent()->getTerminator();
1713b60736ecSDimitry Andric }
1714b60736ecSDimitry Andric
1715b60736ecSDimitry Andric /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
splitTargetDataBeginRTC__anon7bbaa8dc0111::OpenMPOpt1716b60736ecSDimitry Andric bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1717b60736ecSDimitry Andric Instruction &WaitMovementPoint) {
1718b60736ecSDimitry Andric // Create stack allocated handle (__tgt_async_info) at the beginning of the
1719b60736ecSDimitry Andric // function. Used for storing information of the async transfer, allowing to
1720b60736ecSDimitry Andric // wait on it later.
1721b60736ecSDimitry Andric auto &IRBuilder = OMPInfoCache.OMPBuilder;
1722e3b55780SDimitry Andric Function *F = RuntimeCall.getCaller();
1723e3b55780SDimitry Andric BasicBlock &Entry = F->getEntryBlock();
1724e3b55780SDimitry Andric IRBuilder.Builder.SetInsertPoint(&Entry,
1725e3b55780SDimitry Andric Entry.getFirstNonPHIOrDbgOrAlloca());
1726e3b55780SDimitry Andric Value *Handle = IRBuilder.Builder.CreateAlloca(
1727e3b55780SDimitry Andric IRBuilder.AsyncInfo, /*ArraySize=*/nullptr, "handle");
1728e3b55780SDimitry Andric Handle =
1729e3b55780SDimitry Andric IRBuilder.Builder.CreateAddrSpaceCast(Handle, IRBuilder.AsyncInfoPtr);
1730b60736ecSDimitry Andric
1731b60736ecSDimitry Andric // Add "issue" runtime call declaration:
1732b60736ecSDimitry Andric // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1733b60736ecSDimitry Andric // i8**, i8**, i64*, i64*)
1734b60736ecSDimitry Andric FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1735b60736ecSDimitry Andric M, OMPRTL___tgt_target_data_begin_mapper_issue);
1736b60736ecSDimitry Andric
1737b60736ecSDimitry Andric // Change RuntimeCall call site for its asynchronous version.
1738b60736ecSDimitry Andric SmallVector<Value *, 16> Args;
1739b60736ecSDimitry Andric for (auto &Arg : RuntimeCall.args())
1740b60736ecSDimitry Andric Args.push_back(Arg.get());
1741b60736ecSDimitry Andric Args.push_back(Handle);
1742b60736ecSDimitry Andric
1743ac9a064cSDimitry Andric CallInst *IssueCallsite = CallInst::Create(IssueDecl, Args, /*NameStr=*/"",
1744ac9a064cSDimitry Andric RuntimeCall.getIterator());
17456f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite);
1746b60736ecSDimitry Andric RuntimeCall.eraseFromParent();
1747b60736ecSDimitry Andric
1748b60736ecSDimitry Andric // Add "wait" runtime call declaration:
1749b60736ecSDimitry Andric // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1750b60736ecSDimitry Andric FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1751b60736ecSDimitry Andric M, OMPRTL___tgt_target_data_begin_mapper_wait);
1752b60736ecSDimitry Andric
1753b60736ecSDimitry Andric Value *WaitParams[2] = {
1754b60736ecSDimitry Andric IssueCallsite->getArgOperand(
1755b60736ecSDimitry Andric OffloadArray::DeviceIDArgNum), // device_id.
1756b60736ecSDimitry Andric Handle // handle to wait on.
1757b60736ecSDimitry Andric };
17586f8fc217SDimitry Andric CallInst *WaitCallsite = CallInst::Create(
1759ac9a064cSDimitry Andric WaitDecl, WaitParams, /*NameStr=*/"", WaitMovementPoint.getIterator());
17606f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite);
1761b60736ecSDimitry Andric
1762b60736ecSDimitry Andric return true;
1763b60736ecSDimitry Andric }
1764b60736ecSDimitry Andric
combinedIdentStruct__anon7bbaa8dc0111::OpenMPOpt1765cfca06d7SDimitry Andric static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1766cfca06d7SDimitry Andric bool GlobalOnly, bool &SingleChoice) {
1767cfca06d7SDimitry Andric if (CurrentIdent == NextIdent)
1768cfca06d7SDimitry Andric return CurrentIdent;
1769cfca06d7SDimitry Andric
1770cfca06d7SDimitry Andric // TODO: Figure out how to actually combine multiple debug locations. For
1771cfca06d7SDimitry Andric // now we just keep an existing one if there is a single choice.
1772cfca06d7SDimitry Andric if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1773cfca06d7SDimitry Andric SingleChoice = !CurrentIdent;
1774cfca06d7SDimitry Andric return NextIdent;
1775cfca06d7SDimitry Andric }
1776cfca06d7SDimitry Andric return nullptr;
1777cfca06d7SDimitry Andric }
1778cfca06d7SDimitry Andric
1779cfca06d7SDimitry Andric /// Return an `struct ident_t*` value that represents the ones used in the
1780cfca06d7SDimitry Andric /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1781cfca06d7SDimitry Andric /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1782cfca06d7SDimitry Andric /// return value we create one from scratch. We also do not yet combine
1783cfca06d7SDimitry Andric /// information, e.g., the source locations, see combinedIdentStruct.
1784cfca06d7SDimitry Andric Value *
getCombinedIdentFromCallUsesIn__anon7bbaa8dc0111::OpenMPOpt1785cfca06d7SDimitry Andric getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
1786cfca06d7SDimitry Andric Function &F, bool GlobalOnly) {
1787cfca06d7SDimitry Andric bool SingleChoice = true;
1788cfca06d7SDimitry Andric Value *Ident = nullptr;
1789cfca06d7SDimitry Andric auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1790cfca06d7SDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI);
1791cfca06d7SDimitry Andric if (!CI || &F != &Caller)
1792cfca06d7SDimitry Andric return false;
1793cfca06d7SDimitry Andric Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1794cfca06d7SDimitry Andric /* GlobalOnly */ true, SingleChoice);
1795cfca06d7SDimitry Andric return false;
1796cfca06d7SDimitry Andric };
1797cfca06d7SDimitry Andric RFI.foreachUse(SCC, CombineIdentStruct);
1798cfca06d7SDimitry Andric
1799cfca06d7SDimitry Andric if (!Ident || !SingleChoice) {
1800cfca06d7SDimitry Andric // The IRBuilder uses the insertion block to get to the module, this is
1801cfca06d7SDimitry Andric // unfortunate but we work around it for now.
1802cfca06d7SDimitry Andric if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
1803cfca06d7SDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1804cfca06d7SDimitry Andric &F.getEntryBlock(), F.getEntryBlock().begin()));
1805cfca06d7SDimitry Andric // Create a fallback location if non was found.
1806cfca06d7SDimitry Andric // TODO: Use the debug locations of the calls instead.
18076f8fc217SDimitry Andric uint32_t SrcLocStrSize;
18086f8fc217SDimitry Andric Constant *Loc =
18096f8fc217SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
18106f8fc217SDimitry Andric Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize);
1811cfca06d7SDimitry Andric }
1812cfca06d7SDimitry Andric return Ident;
1813cfca06d7SDimitry Andric }
1814cfca06d7SDimitry Andric
1815cfca06d7SDimitry Andric /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
1816cfca06d7SDimitry Andric /// \p ReplVal if given.
deduplicateRuntimeCalls__anon7bbaa8dc0111::OpenMPOpt1817cfca06d7SDimitry Andric bool deduplicateRuntimeCalls(Function &F,
1818cfca06d7SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI,
1819cfca06d7SDimitry Andric Value *ReplVal = nullptr) {
1820cfca06d7SDimitry Andric auto *UV = RFI.getUseVector(F);
1821cfca06d7SDimitry Andric if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1822cfca06d7SDimitry Andric return false;
1823cfca06d7SDimitry Andric
1824cfca06d7SDimitry Andric LLVM_DEBUG(
1825cfca06d7SDimitry Andric dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
1826cfca06d7SDimitry Andric << (ReplVal ? " with an existing value\n" : "\n") << "\n");
1827cfca06d7SDimitry Andric
1828cfca06d7SDimitry Andric assert((!ReplVal || (isa<Argument>(ReplVal) &&
1829cfca06d7SDimitry Andric cast<Argument>(ReplVal)->getParent() == &F)) &&
1830cfca06d7SDimitry Andric "Unexpected replacement value!");
1831cfca06d7SDimitry Andric
1832cfca06d7SDimitry Andric // TODO: Use dominance to find a good position instead.
1833cfca06d7SDimitry Andric auto CanBeMoved = [this](CallBase &CB) {
1834c0981da4SDimitry Andric unsigned NumArgs = CB.arg_size();
1835cfca06d7SDimitry Andric if (NumArgs == 0)
1836cfca06d7SDimitry Andric return true;
1837cfca06d7SDimitry Andric if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1838cfca06d7SDimitry Andric return false;
1839c0981da4SDimitry Andric for (unsigned U = 1; U < NumArgs; ++U)
1840c0981da4SDimitry Andric if (isa<Instruction>(CB.getArgOperand(U)))
1841cfca06d7SDimitry Andric return false;
1842cfca06d7SDimitry Andric return true;
1843cfca06d7SDimitry Andric };
1844cfca06d7SDimitry Andric
1845cfca06d7SDimitry Andric if (!ReplVal) {
18467fa27ce4SDimitry Andric auto *DT =
18477fa27ce4SDimitry Andric OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F);
18487fa27ce4SDimitry Andric if (!DT)
18497fa27ce4SDimitry Andric return false;
18507fa27ce4SDimitry Andric Instruction *IP = nullptr;
18517fa27ce4SDimitry Andric for (Use *U : *UV) {
1852cfca06d7SDimitry Andric if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
18537fa27ce4SDimitry Andric if (IP)
18547fa27ce4SDimitry Andric IP = DT->findNearestCommonDominator(IP, CI);
18557fa27ce4SDimitry Andric else
18567fa27ce4SDimitry Andric IP = CI;
1857cfca06d7SDimitry Andric if (!CanBeMoved(*CI))
1858cfca06d7SDimitry Andric continue;
18597fa27ce4SDimitry Andric if (!ReplVal)
1860cfca06d7SDimitry Andric ReplVal = CI;
18617fa27ce4SDimitry Andric }
1862cfca06d7SDimitry Andric }
1863cfca06d7SDimitry Andric if (!ReplVal)
1864cfca06d7SDimitry Andric return false;
18657fa27ce4SDimitry Andric assert(IP && "Expected insertion point!");
18667fa27ce4SDimitry Andric cast<Instruction>(ReplVal)->moveBefore(IP);
1867cfca06d7SDimitry Andric }
1868cfca06d7SDimitry Andric
1869cfca06d7SDimitry Andric // If we use a call as a replacement value we need to make sure the ident is
1870cfca06d7SDimitry Andric // valid at the new location. For now we just pick a global one, either
1871cfca06d7SDimitry Andric // existing and used by one of the calls, or created from scratch.
1872cfca06d7SDimitry Andric if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
1873c0981da4SDimitry Andric if (!CI->arg_empty() &&
1874cfca06d7SDimitry Andric CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1875cfca06d7SDimitry Andric Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1876cfca06d7SDimitry Andric /* GlobalOnly */ true);
1877cfca06d7SDimitry Andric CI->setArgOperand(0, Ident);
1878cfca06d7SDimitry Andric }
1879cfca06d7SDimitry Andric }
1880cfca06d7SDimitry Andric
1881cfca06d7SDimitry Andric bool Changed = false;
1882cfca06d7SDimitry Andric auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
1883cfca06d7SDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI);
1884cfca06d7SDimitry Andric if (!CI || CI == ReplVal || &F != &Caller)
1885cfca06d7SDimitry Andric return false;
1886cfca06d7SDimitry Andric assert(CI->getCaller() == &F && "Unexpected call!");
1887cfca06d7SDimitry Andric
1888cfca06d7SDimitry Andric auto Remark = [&](OptimizationRemark OR) {
1889cfca06d7SDimitry Andric return OR << "OpenMP runtime call "
1890344a3780SDimitry Andric << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated.";
1891cfca06d7SDimitry Andric };
1892344a3780SDimitry Andric if (CI->getDebugLoc())
1893344a3780SDimitry Andric emitRemark<OptimizationRemark>(CI, "OMP170", Remark);
1894344a3780SDimitry Andric else
1895344a3780SDimitry Andric emitRemark<OptimizationRemark>(&F, "OMP170", Remark);
1896cfca06d7SDimitry Andric
1897cfca06d7SDimitry Andric CI->replaceAllUsesWith(ReplVal);
1898cfca06d7SDimitry Andric CI->eraseFromParent();
1899cfca06d7SDimitry Andric ++NumOpenMPRuntimeCallsDeduplicated;
1900cfca06d7SDimitry Andric Changed = true;
1901cfca06d7SDimitry Andric return true;
1902cfca06d7SDimitry Andric };
1903cfca06d7SDimitry Andric RFI.foreachUse(SCC, ReplaceAndDeleteCB);
1904cfca06d7SDimitry Andric
1905cfca06d7SDimitry Andric return Changed;
1906cfca06d7SDimitry Andric }
1907cfca06d7SDimitry Andric
1908cfca06d7SDimitry Andric /// Collect arguments that represent the global thread id in \p GTIdArgs.
collectGlobalThreadIdArguments__anon7bbaa8dc0111::OpenMPOpt1909cfca06d7SDimitry Andric void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) {
1910cfca06d7SDimitry Andric // TODO: Below we basically perform a fixpoint iteration with a pessimistic
1911cfca06d7SDimitry Andric // initialization. We could define an AbstractAttribute instead and
1912cfca06d7SDimitry Andric // run the Attributor here once it can be run as an SCC pass.
1913cfca06d7SDimitry Andric
1914cfca06d7SDimitry Andric // Helper to check the argument \p ArgNo at all call sites of \p F for
1915cfca06d7SDimitry Andric // a GTId.
1916cfca06d7SDimitry Andric auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
1917cfca06d7SDimitry Andric if (!F.hasLocalLinkage())
1918cfca06d7SDimitry Andric return false;
1919cfca06d7SDimitry Andric for (Use &U : F.uses()) {
1920cfca06d7SDimitry Andric if (CallInst *CI = getCallIfRegularCall(U)) {
1921cfca06d7SDimitry Andric Value *ArgOp = CI->getArgOperand(ArgNo);
1922cfca06d7SDimitry Andric if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
1923cfca06d7SDimitry Andric getCallIfRegularCall(
1924cfca06d7SDimitry Andric *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
1925cfca06d7SDimitry Andric continue;
1926cfca06d7SDimitry Andric }
1927cfca06d7SDimitry Andric return false;
1928cfca06d7SDimitry Andric }
1929cfca06d7SDimitry Andric return true;
1930cfca06d7SDimitry Andric };
1931cfca06d7SDimitry Andric
1932cfca06d7SDimitry Andric // Helper to identify uses of a GTId as GTId arguments.
1933cfca06d7SDimitry Andric auto AddUserArgs = [&](Value >Id) {
1934cfca06d7SDimitry Andric for (Use &U : GTId.uses())
1935cfca06d7SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
1936cfca06d7SDimitry Andric if (CI->isArgOperand(&U))
1937cfca06d7SDimitry Andric if (Function *Callee = CI->getCalledFunction())
1938cfca06d7SDimitry Andric if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
1939cfca06d7SDimitry Andric GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
1940cfca06d7SDimitry Andric };
1941cfca06d7SDimitry Andric
1942cfca06d7SDimitry Andric // The argument users of __kmpc_global_thread_num calls are GTIds.
1943cfca06d7SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
1944cfca06d7SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
1945cfca06d7SDimitry Andric
1946cfca06d7SDimitry Andric GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
1947cfca06d7SDimitry Andric if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
1948cfca06d7SDimitry Andric AddUserArgs(*CI);
1949cfca06d7SDimitry Andric return false;
1950cfca06d7SDimitry Andric });
1951cfca06d7SDimitry Andric
1952cfca06d7SDimitry Andric // Transitively search for more arguments by looking at the users of the
1953cfca06d7SDimitry Andric // ones we know already. During the search the GTIdArgs vector is extended
1954cfca06d7SDimitry Andric // so we cannot cache the size nor can we use a range based for.
1955c0981da4SDimitry Andric for (unsigned U = 0; U < GTIdArgs.size(); ++U)
1956c0981da4SDimitry Andric AddUserArgs(*GTIdArgs[U]);
1957cfca06d7SDimitry Andric }
1958cfca06d7SDimitry Andric
1959cfca06d7SDimitry Andric /// Kernel (=GPU) optimizations and utility functions
1960cfca06d7SDimitry Andric ///
1961cfca06d7SDimitry Andric ///{{
1962cfca06d7SDimitry Andric
1963cfca06d7SDimitry Andric /// Cache to remember the unique kernel for a function.
1964e3b55780SDimitry Andric DenseMap<Function *, std::optional<Kernel>> UniqueKernelMap;
1965cfca06d7SDimitry Andric
1966cfca06d7SDimitry Andric /// Find the unique kernel that will execute \p F, if any.
1967cfca06d7SDimitry Andric Kernel getUniqueKernelFor(Function &F);
1968cfca06d7SDimitry Andric
1969cfca06d7SDimitry Andric /// Find the unique kernel that will execute \p I, if any.
getUniqueKernelFor__anon7bbaa8dc0111::OpenMPOpt1970cfca06d7SDimitry Andric Kernel getUniqueKernelFor(Instruction &I) {
1971cfca06d7SDimitry Andric return getUniqueKernelFor(*I.getFunction());
1972cfca06d7SDimitry Andric }
1973cfca06d7SDimitry Andric
1974cfca06d7SDimitry Andric /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
1975cfca06d7SDimitry Andric /// the cases we can avoid taking the address of a function.
1976cfca06d7SDimitry Andric bool rewriteDeviceCodeStateMachine();
1977cfca06d7SDimitry Andric
1978cfca06d7SDimitry Andric ///
1979cfca06d7SDimitry Andric ///}}
1980cfca06d7SDimitry Andric
1981cfca06d7SDimitry Andric /// Emit a remark generically
1982cfca06d7SDimitry Andric ///
1983cfca06d7SDimitry Andric /// This template function can be used to generically emit a remark. The
1984cfca06d7SDimitry Andric /// RemarkKind should be one of the following:
1985cfca06d7SDimitry Andric /// - OptimizationRemark to indicate a successful optimization attempt
1986cfca06d7SDimitry Andric /// - OptimizationRemarkMissed to report a failed optimization attempt
1987cfca06d7SDimitry Andric /// - OptimizationRemarkAnalysis to provide additional information about an
1988cfca06d7SDimitry Andric /// optimization attempt
1989cfca06d7SDimitry Andric ///
1990cfca06d7SDimitry Andric /// The remark is built using a callback function provided by the caller that
1991cfca06d7SDimitry Andric /// takes a RemarkKind as input and returns a RemarkKind.
1992344a3780SDimitry Andric template <typename RemarkKind, typename RemarkCallBack>
emitRemark__anon7bbaa8dc0111::OpenMPOpt1993344a3780SDimitry Andric void emitRemark(Instruction *I, StringRef RemarkName,
1994cfca06d7SDimitry Andric RemarkCallBack &&RemarkCB) const {
1995344a3780SDimitry Andric Function *F = I->getParent()->getParent();
1996cfca06d7SDimitry Andric auto &ORE = OREGetter(F);
1997cfca06d7SDimitry Andric
1998b1c73532SDimitry Andric if (RemarkName.starts_with("OMP"))
1999cfca06d7SDimitry Andric ORE.emit([&]() {
2000344a3780SDimitry Andric return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I))
2001344a3780SDimitry Andric << " [" << RemarkName << "]";
2002cfca06d7SDimitry Andric });
2003344a3780SDimitry Andric else
2004344a3780SDimitry Andric ORE.emit(
2005344a3780SDimitry Andric [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
2006cfca06d7SDimitry Andric }
2007cfca06d7SDimitry Andric
2008344a3780SDimitry Andric /// Emit a remark on a function.
2009344a3780SDimitry Andric template <typename RemarkKind, typename RemarkCallBack>
emitRemark__anon7bbaa8dc0111::OpenMPOpt2010344a3780SDimitry Andric void emitRemark(Function *F, StringRef RemarkName,
2011344a3780SDimitry Andric RemarkCallBack &&RemarkCB) const {
2012344a3780SDimitry Andric auto &ORE = OREGetter(F);
2013344a3780SDimitry Andric
2014b1c73532SDimitry Andric if (RemarkName.starts_with("OMP"))
2015344a3780SDimitry Andric ORE.emit([&]() {
2016344a3780SDimitry Andric return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F))
2017344a3780SDimitry Andric << " [" << RemarkName << "]";
2018344a3780SDimitry Andric });
2019344a3780SDimitry Andric else
2020344a3780SDimitry Andric ORE.emit(
2021344a3780SDimitry Andric [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
2022344a3780SDimitry Andric }
2023344a3780SDimitry Andric
2024cfca06d7SDimitry Andric /// The underlying module.
2025cfca06d7SDimitry Andric Module &M;
2026cfca06d7SDimitry Andric
2027cfca06d7SDimitry Andric /// The SCC we are operating on.
2028cfca06d7SDimitry Andric SmallVectorImpl<Function *> &SCC;
2029cfca06d7SDimitry Andric
2030cfca06d7SDimitry Andric /// Callback to update the call graph, the first argument is a removed call,
2031cfca06d7SDimitry Andric /// the second an optional replacement call.
2032cfca06d7SDimitry Andric CallGraphUpdater &CGUpdater;
2033cfca06d7SDimitry Andric
2034cfca06d7SDimitry Andric /// Callback to get an OptimizationRemarkEmitter from a Function *
2035cfca06d7SDimitry Andric OptimizationRemarkGetter OREGetter;
2036cfca06d7SDimitry Andric
2037cfca06d7SDimitry Andric /// OpenMP-specific information cache. Also Used for Attributor runs.
2038cfca06d7SDimitry Andric OMPInformationCache &OMPInfoCache;
2039cfca06d7SDimitry Andric
2040cfca06d7SDimitry Andric /// Attributor instance.
2041cfca06d7SDimitry Andric Attributor &A;
2042cfca06d7SDimitry Andric
2043cfca06d7SDimitry Andric /// Helper function to run Attributor on SCC.
runAttributor__anon7bbaa8dc0111::OpenMPOpt2044344a3780SDimitry Andric bool runAttributor(bool IsModulePass) {
2045cfca06d7SDimitry Andric if (SCC.empty())
2046cfca06d7SDimitry Andric return false;
2047cfca06d7SDimitry Andric
2048344a3780SDimitry Andric registerAAs(IsModulePass);
2049cfca06d7SDimitry Andric
2050cfca06d7SDimitry Andric ChangeStatus Changed = A.run();
2051cfca06d7SDimitry Andric
2052cfca06d7SDimitry Andric LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
2053cfca06d7SDimitry Andric << " functions, result: " << Changed << ".\n");
2054cfca06d7SDimitry Andric
205599aabd70SDimitry Andric if (Changed == ChangeStatus::CHANGED)
205699aabd70SDimitry Andric OMPInfoCache.invalidateAnalyses();
205799aabd70SDimitry Andric
2058cfca06d7SDimitry Andric return Changed == ChangeStatus::CHANGED;
2059cfca06d7SDimitry Andric }
2060cfca06d7SDimitry Andric
2061344a3780SDimitry Andric void registerFoldRuntimeCall(RuntimeFunction RF);
2062344a3780SDimitry Andric
2063cfca06d7SDimitry Andric /// Populate the Attributor with abstract attribute opportunities in the
2064e3b55780SDimitry Andric /// functions.
2065344a3780SDimitry Andric void registerAAs(bool IsModulePass);
2066e3b55780SDimitry Andric
2067e3b55780SDimitry Andric public:
2068e3b55780SDimitry Andric /// Callback to register AAs for live functions, including internal functions
2069e3b55780SDimitry Andric /// marked live during the traversal.
2070e3b55780SDimitry Andric static void registerAAsForFunction(Attributor &A, const Function &F);
2071cfca06d7SDimitry Andric };
2072cfca06d7SDimitry Andric
getUniqueKernelFor(Function & F)2073cfca06d7SDimitry Andric Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
20747fa27ce4SDimitry Andric if (OMPInfoCache.CGSCC && !OMPInfoCache.CGSCC->empty() &&
20757fa27ce4SDimitry Andric !OMPInfoCache.CGSCC->contains(&F))
2076cfca06d7SDimitry Andric return nullptr;
2077cfca06d7SDimitry Andric
2078cfca06d7SDimitry Andric // Use a scope to keep the lifetime of the CachedKernel short.
2079cfca06d7SDimitry Andric {
2080e3b55780SDimitry Andric std::optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
2081cfca06d7SDimitry Andric if (CachedKernel)
2082cfca06d7SDimitry Andric return *CachedKernel;
2083cfca06d7SDimitry Andric
2084cfca06d7SDimitry Andric // TODO: We should use an AA to create an (optimistic and callback
2085cfca06d7SDimitry Andric // call-aware) call graph. For now we stick to simple patterns that
2086cfca06d7SDimitry Andric // are less powerful, basically the worst fixpoint.
2087b1c73532SDimitry Andric if (isOpenMPKernel(F)) {
2088cfca06d7SDimitry Andric CachedKernel = Kernel(&F);
2089cfca06d7SDimitry Andric return *CachedKernel;
2090cfca06d7SDimitry Andric }
2091cfca06d7SDimitry Andric
2092cfca06d7SDimitry Andric CachedKernel = nullptr;
2093b60736ecSDimitry Andric if (!F.hasLocalLinkage()) {
2094b60736ecSDimitry Andric
2095b60736ecSDimitry Andric // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
2096344a3780SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2097344a3780SDimitry Andric return ORA << "Potentially unknown OpenMP target region caller.";
2098b60736ecSDimitry Andric };
2099344a3780SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
2100b60736ecSDimitry Andric
2101cfca06d7SDimitry Andric return nullptr;
2102cfca06d7SDimitry Andric }
2103b60736ecSDimitry Andric }
2104cfca06d7SDimitry Andric
2105cfca06d7SDimitry Andric auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
2106cfca06d7SDimitry Andric if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
2107cfca06d7SDimitry Andric // Allow use in equality comparisons.
2108cfca06d7SDimitry Andric if (Cmp->isEquality())
2109cfca06d7SDimitry Andric return getUniqueKernelFor(*Cmp);
2110cfca06d7SDimitry Andric return nullptr;
2111cfca06d7SDimitry Andric }
2112cfca06d7SDimitry Andric if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
2113cfca06d7SDimitry Andric // Allow direct calls.
2114cfca06d7SDimitry Andric if (CB->isCallee(&U))
2115cfca06d7SDimitry Andric return getUniqueKernelFor(*CB);
2116344a3780SDimitry Andric
2117344a3780SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
2118344a3780SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
2119344a3780SDimitry Andric // Allow the use in __kmpc_parallel_51 calls.
2120344a3780SDimitry Andric if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
2121cfca06d7SDimitry Andric return getUniqueKernelFor(*CB);
2122cfca06d7SDimitry Andric return nullptr;
2123cfca06d7SDimitry Andric }
2124cfca06d7SDimitry Andric // Disallow every other use.
2125cfca06d7SDimitry Andric return nullptr;
2126cfca06d7SDimitry Andric };
2127cfca06d7SDimitry Andric
2128cfca06d7SDimitry Andric // TODO: In the future we want to track more than just a unique kernel.
2129cfca06d7SDimitry Andric SmallPtrSet<Kernel, 2> PotentialKernels;
2130b60736ecSDimitry Andric OMPInformationCache::foreachUse(F, [&](const Use &U) {
2131cfca06d7SDimitry Andric PotentialKernels.insert(GetUniqueKernelForUse(U));
2132cfca06d7SDimitry Andric });
2133cfca06d7SDimitry Andric
2134cfca06d7SDimitry Andric Kernel K = nullptr;
2135cfca06d7SDimitry Andric if (PotentialKernels.size() == 1)
2136cfca06d7SDimitry Andric K = *PotentialKernels.begin();
2137cfca06d7SDimitry Andric
2138cfca06d7SDimitry Andric // Cache the result.
2139cfca06d7SDimitry Andric UniqueKernelMap[&F] = K;
2140cfca06d7SDimitry Andric
2141cfca06d7SDimitry Andric return K;
2142cfca06d7SDimitry Andric }
2143cfca06d7SDimitry Andric
rewriteDeviceCodeStateMachine()2144cfca06d7SDimitry Andric bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
2145344a3780SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
2146344a3780SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
2147cfca06d7SDimitry Andric
2148cfca06d7SDimitry Andric bool Changed = false;
2149344a3780SDimitry Andric if (!KernelParallelRFI)
2150cfca06d7SDimitry Andric return Changed;
2151cfca06d7SDimitry Andric
2152c0981da4SDimitry Andric // If we have disabled state machine changes, exit
2153c0981da4SDimitry Andric if (DisableOpenMPOptStateMachineRewrite)
2154c0981da4SDimitry Andric return Changed;
2155c0981da4SDimitry Andric
2156cfca06d7SDimitry Andric for (Function *F : SCC) {
2157cfca06d7SDimitry Andric
2158344a3780SDimitry Andric // Check if the function is a use in a __kmpc_parallel_51 call at
2159cfca06d7SDimitry Andric // all.
2160cfca06d7SDimitry Andric bool UnknownUse = false;
2161344a3780SDimitry Andric bool KernelParallelUse = false;
2162cfca06d7SDimitry Andric unsigned NumDirectCalls = 0;
2163cfca06d7SDimitry Andric
2164cfca06d7SDimitry Andric SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
2165b60736ecSDimitry Andric OMPInformationCache::foreachUse(*F, [&](Use &U) {
2166cfca06d7SDimitry Andric if (auto *CB = dyn_cast<CallBase>(U.getUser()))
2167cfca06d7SDimitry Andric if (CB->isCallee(&U)) {
2168cfca06d7SDimitry Andric ++NumDirectCalls;
2169cfca06d7SDimitry Andric return;
2170cfca06d7SDimitry Andric }
2171cfca06d7SDimitry Andric
2172cfca06d7SDimitry Andric if (isa<ICmpInst>(U.getUser())) {
2173cfca06d7SDimitry Andric ToBeReplacedStateMachineUses.push_back(&U);
2174cfca06d7SDimitry Andric return;
2175cfca06d7SDimitry Andric }
2176344a3780SDimitry Andric
2177344a3780SDimitry Andric // Find wrapper functions that represent parallel kernels.
2178344a3780SDimitry Andric CallInst *CI =
2179344a3780SDimitry Andric OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
2180344a3780SDimitry Andric const unsigned int WrapperFunctionArgNo = 6;
2181344a3780SDimitry Andric if (!KernelParallelUse && CI &&
2182344a3780SDimitry Andric CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
2183344a3780SDimitry Andric KernelParallelUse = true;
2184cfca06d7SDimitry Andric ToBeReplacedStateMachineUses.push_back(&U);
2185cfca06d7SDimitry Andric return;
2186cfca06d7SDimitry Andric }
2187cfca06d7SDimitry Andric UnknownUse = true;
2188cfca06d7SDimitry Andric });
2189cfca06d7SDimitry Andric
2190344a3780SDimitry Andric // Do not emit a remark if we haven't seen a __kmpc_parallel_51
2191cfca06d7SDimitry Andric // use.
2192344a3780SDimitry Andric if (!KernelParallelUse)
2193cfca06d7SDimitry Andric continue;
2194cfca06d7SDimitry Andric
2195cfca06d7SDimitry Andric // If this ever hits, we should investigate.
2196cfca06d7SDimitry Andric // TODO: Checking the number of uses is not a necessary restriction and
2197cfca06d7SDimitry Andric // should be lifted.
2198cfca06d7SDimitry Andric if (UnknownUse || NumDirectCalls != 1 ||
2199344a3780SDimitry Andric ToBeReplacedStateMachineUses.size() > 2) {
2200344a3780SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2201344a3780SDimitry Andric return ORA << "Parallel region is used in "
2202cfca06d7SDimitry Andric << (UnknownUse ? "unknown" : "unexpected")
2203344a3780SDimitry Andric << " ways. Will not attempt to rewrite the state machine.";
2204cfca06d7SDimitry Andric };
2205344a3780SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark);
2206cfca06d7SDimitry Andric continue;
2207cfca06d7SDimitry Andric }
2208cfca06d7SDimitry Andric
2209344a3780SDimitry Andric // Even if we have __kmpc_parallel_51 calls, we (for now) give
2210cfca06d7SDimitry Andric // up if the function is not called from a unique kernel.
2211cfca06d7SDimitry Andric Kernel K = getUniqueKernelFor(*F);
2212cfca06d7SDimitry Andric if (!K) {
2213344a3780SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2214344a3780SDimitry Andric return ORA << "Parallel region is not called from a unique kernel. "
2215344a3780SDimitry Andric "Will not attempt to rewrite the state machine.";
2216cfca06d7SDimitry Andric };
2217344a3780SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark);
2218cfca06d7SDimitry Andric continue;
2219cfca06d7SDimitry Andric }
2220cfca06d7SDimitry Andric
2221cfca06d7SDimitry Andric // We now know F is a parallel body function called only from the kernel K.
2222cfca06d7SDimitry Andric // We also identified the state machine uses in which we replace the
2223cfca06d7SDimitry Andric // function pointer by a new global symbol for identification purposes. This
2224cfca06d7SDimitry Andric // ensures only direct calls to the function are left.
2225cfca06d7SDimitry Andric
2226cfca06d7SDimitry Andric Module &M = *F->getParent();
2227cfca06d7SDimitry Andric Type *Int8Ty = Type::getInt8Ty(M.getContext());
2228cfca06d7SDimitry Andric
2229cfca06d7SDimitry Andric auto *ID = new GlobalVariable(
2230cfca06d7SDimitry Andric M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
2231cfca06d7SDimitry Andric UndefValue::get(Int8Ty), F->getName() + ".ID");
2232cfca06d7SDimitry Andric
2233cfca06d7SDimitry Andric for (Use *U : ToBeReplacedStateMachineUses)
2234c0981da4SDimitry Andric U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
2235c0981da4SDimitry Andric ID, U->get()->getType()));
2236cfca06d7SDimitry Andric
2237cfca06d7SDimitry Andric ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
2238cfca06d7SDimitry Andric
2239cfca06d7SDimitry Andric Changed = true;
2240cfca06d7SDimitry Andric }
2241cfca06d7SDimitry Andric
2242cfca06d7SDimitry Andric return Changed;
2243cfca06d7SDimitry Andric }
2244cfca06d7SDimitry Andric
2245cfca06d7SDimitry Andric /// Abstract Attribute for tracking ICV values.
2246cfca06d7SDimitry Andric struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
2247cfca06d7SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker__anon7bbaa8dc0111::AAICVTracker2248cfca06d7SDimitry Andric AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2249cfca06d7SDimitry Andric
2250cfca06d7SDimitry Andric /// Returns true if value is assumed to be tracked.
isAssumedTracked__anon7bbaa8dc0111::AAICVTracker2251cfca06d7SDimitry Andric bool isAssumedTracked() const { return getAssumed(); }
2252cfca06d7SDimitry Andric
2253cfca06d7SDimitry Andric /// Returns true if value is known to be tracked.
isKnownTracked__anon7bbaa8dc0111::AAICVTracker2254cfca06d7SDimitry Andric bool isKnownTracked() const { return getAssumed(); }
2255cfca06d7SDimitry Andric
2256cfca06d7SDimitry Andric /// Create an abstract attribute biew for the position \p IRP.
2257cfca06d7SDimitry Andric static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
2258cfca06d7SDimitry Andric
2259cfca06d7SDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV.
getReplacementValue__anon7bbaa8dc0111::AAICVTracker2260e3b55780SDimitry Andric virtual std::optional<Value *> getReplacementValue(InternalControlVar ICV,
2261b60736ecSDimitry Andric const Instruction *I,
2262b60736ecSDimitry Andric Attributor &A) const {
2263e3b55780SDimitry Andric return std::nullopt;
2264b60736ecSDimitry Andric }
2265b60736ecSDimitry Andric
2266b60736ecSDimitry Andric /// Return an assumed unique ICV value if a single candidate is found. If
2267e3b55780SDimitry Andric /// there cannot be one, return a nullptr. If it is not clear yet, return
2268e3b55780SDimitry Andric /// std::nullopt.
2269e3b55780SDimitry Andric virtual std::optional<Value *>
2270b60736ecSDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const = 0;
2271b60736ecSDimitry Andric
2272b60736ecSDimitry Andric // Currently only nthreads is being tracked.
2273b60736ecSDimitry Andric // this array will only grow with time.
2274b60736ecSDimitry Andric InternalControlVar TrackableICVs[1] = {ICV_nthreads};
2275cfca06d7SDimitry Andric
2276cfca06d7SDimitry Andric /// See AbstractAttribute::getName()
getName__anon7bbaa8dc0111::AAICVTracker2277cfca06d7SDimitry Andric const std::string getName() const override { return "AAICVTracker"; }
2278cfca06d7SDimitry Andric
2279cfca06d7SDimitry Andric /// See AbstractAttribute::getIdAddr()
getIdAddr__anon7bbaa8dc0111::AAICVTracker2280cfca06d7SDimitry Andric const char *getIdAddr() const override { return &ID; }
2281cfca06d7SDimitry Andric
2282cfca06d7SDimitry Andric /// This function should return true if the type of the \p AA is AAICVTracker
classof__anon7bbaa8dc0111::AAICVTracker2283cfca06d7SDimitry Andric static bool classof(const AbstractAttribute *AA) {
2284cfca06d7SDimitry Andric return (AA->getIdAddr() == &ID);
2285cfca06d7SDimitry Andric }
2286cfca06d7SDimitry Andric
2287cfca06d7SDimitry Andric static const char ID;
2288cfca06d7SDimitry Andric };
2289cfca06d7SDimitry Andric
2290cfca06d7SDimitry Andric struct AAICVTrackerFunction : public AAICVTracker {
AAICVTrackerFunction__anon7bbaa8dc0111::AAICVTrackerFunction2291cfca06d7SDimitry Andric AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
2292cfca06d7SDimitry Andric : AAICVTracker(IRP, A) {}
2293cfca06d7SDimitry Andric
2294cfca06d7SDimitry Andric // FIXME: come up with better string.
getAsStr__anon7bbaa8dc0111::AAICVTrackerFunction22957fa27ce4SDimitry Andric const std::string getAsStr(Attributor *) const override {
22967fa27ce4SDimitry Andric return "ICVTrackerFunction";
22977fa27ce4SDimitry Andric }
2298cfca06d7SDimitry Andric
2299cfca06d7SDimitry Andric // FIXME: come up with some stats.
trackStatistics__anon7bbaa8dc0111::AAICVTrackerFunction2300cfca06d7SDimitry Andric void trackStatistics() const override {}
2301cfca06d7SDimitry Andric
2302b60736ecSDimitry Andric /// We don't manifest anything for this AA.
manifest__anon7bbaa8dc0111::AAICVTrackerFunction2303cfca06d7SDimitry Andric ChangeStatus manifest(Attributor &A) override {
2304b60736ecSDimitry Andric return ChangeStatus::UNCHANGED;
2305cfca06d7SDimitry Andric }
2306cfca06d7SDimitry Andric
2307cfca06d7SDimitry Andric // Map of ICV to their values at specific program point.
2308b60736ecSDimitry Andric EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
2309cfca06d7SDimitry Andric InternalControlVar::ICV___last>
2310b60736ecSDimitry Andric ICVReplacementValuesMap;
2311cfca06d7SDimitry Andric
updateImpl__anon7bbaa8dc0111::AAICVTrackerFunction2312cfca06d7SDimitry Andric ChangeStatus updateImpl(Attributor &A) override {
2313cfca06d7SDimitry Andric ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
2314cfca06d7SDimitry Andric
2315cfca06d7SDimitry Andric Function *F = getAnchorScope();
2316cfca06d7SDimitry Andric
2317cfca06d7SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2318cfca06d7SDimitry Andric
2319cfca06d7SDimitry Andric for (InternalControlVar ICV : TrackableICVs) {
2320cfca06d7SDimitry Andric auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2321cfca06d7SDimitry Andric
2322b60736ecSDimitry Andric auto &ValuesMap = ICVReplacementValuesMap[ICV];
2323cfca06d7SDimitry Andric auto TrackValues = [&](Use &U, Function &) {
2324cfca06d7SDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
2325cfca06d7SDimitry Andric if (!CI)
2326cfca06d7SDimitry Andric return false;
2327cfca06d7SDimitry Andric
2328cfca06d7SDimitry Andric // FIXME: handle setters with more that 1 arguments.
2329cfca06d7SDimitry Andric /// Track new value.
2330b60736ecSDimitry Andric if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
2331cfca06d7SDimitry Andric HasChanged = ChangeStatus::CHANGED;
2332cfca06d7SDimitry Andric
2333cfca06d7SDimitry Andric return false;
2334cfca06d7SDimitry Andric };
2335cfca06d7SDimitry Andric
2336b60736ecSDimitry Andric auto CallCheck = [&](Instruction &I) {
2337e3b55780SDimitry Andric std::optional<Value *> ReplVal = getValueForCall(A, I, ICV);
2338145449b1SDimitry Andric if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
2339b60736ecSDimitry Andric HasChanged = ChangeStatus::CHANGED;
2340b60736ecSDimitry Andric
2341b60736ecSDimitry Andric return true;
2342b60736ecSDimitry Andric };
2343b60736ecSDimitry Andric
2344b60736ecSDimitry Andric // Track all changes of an ICV.
2345cfca06d7SDimitry Andric SetterRFI.foreachUse(TrackValues, F);
2346b60736ecSDimitry Andric
2347344a3780SDimitry Andric bool UsedAssumedInformation = false;
2348b60736ecSDimitry Andric A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
2349344a3780SDimitry Andric UsedAssumedInformation,
2350b60736ecSDimitry Andric /* CheckBBLivenessOnly */ true);
2351b60736ecSDimitry Andric
2352b60736ecSDimitry Andric /// TODO: Figure out a way to avoid adding entry in
2353b60736ecSDimitry Andric /// ICVReplacementValuesMap
2354b60736ecSDimitry Andric Instruction *Entry = &F->getEntryBlock().front();
2355b60736ecSDimitry Andric if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
2356b60736ecSDimitry Andric ValuesMap.insert(std::make_pair(Entry, nullptr));
2357cfca06d7SDimitry Andric }
2358cfca06d7SDimitry Andric
2359cfca06d7SDimitry Andric return HasChanged;
2360cfca06d7SDimitry Andric }
2361cfca06d7SDimitry Andric
23626f8fc217SDimitry Andric /// Helper to check if \p I is a call and get the value for it if it is
2363b60736ecSDimitry Andric /// unique.
getValueForCall__anon7bbaa8dc0111::AAICVTrackerFunction2364e3b55780SDimitry Andric std::optional<Value *> getValueForCall(Attributor &A, const Instruction &I,
2365b60736ecSDimitry Andric InternalControlVar &ICV) const {
2366cfca06d7SDimitry Andric
23676f8fc217SDimitry Andric const auto *CB = dyn_cast<CallBase>(&I);
2368b60736ecSDimitry Andric if (!CB || CB->hasFnAttr("no_openmp") ||
2369b60736ecSDimitry Andric CB->hasFnAttr("no_openmp_routines"))
2370e3b55780SDimitry Andric return std::nullopt;
2371b60736ecSDimitry Andric
2372cfca06d7SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2373cfca06d7SDimitry Andric auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
2374b60736ecSDimitry Andric auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2375b60736ecSDimitry Andric Function *CalledFunction = CB->getCalledFunction();
2376cfca06d7SDimitry Andric
2377b60736ecSDimitry Andric // Indirect call, assume ICV changes.
2378b60736ecSDimitry Andric if (CalledFunction == nullptr)
2379b60736ecSDimitry Andric return nullptr;
2380b60736ecSDimitry Andric if (CalledFunction == GetterRFI.Declaration)
2381e3b55780SDimitry Andric return std::nullopt;
2382b60736ecSDimitry Andric if (CalledFunction == SetterRFI.Declaration) {
23836f8fc217SDimitry Andric if (ICVReplacementValuesMap[ICV].count(&I))
23846f8fc217SDimitry Andric return ICVReplacementValuesMap[ICV].lookup(&I);
2385b60736ecSDimitry Andric
2386b60736ecSDimitry Andric return nullptr;
2387b60736ecSDimitry Andric }
2388b60736ecSDimitry Andric
2389b60736ecSDimitry Andric // Since we don't know, assume it changes the ICV.
2390b60736ecSDimitry Andric if (CalledFunction->isDeclaration())
2391b60736ecSDimitry Andric return nullptr;
2392b60736ecSDimitry Andric
23937fa27ce4SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
2394344a3780SDimitry Andric *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
2395b60736ecSDimitry Andric
23967fa27ce4SDimitry Andric if (ICVTrackingAA->isAssumedTracked()) {
23977fa27ce4SDimitry Andric std::optional<Value *> URV =
23987fa27ce4SDimitry Andric ICVTrackingAA->getUniqueReplacementValue(ICV);
2399145449b1SDimitry Andric if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I),
2400145449b1SDimitry Andric OMPInfoCache)))
24016f8fc217SDimitry Andric return URV;
24026f8fc217SDimitry Andric }
2403b60736ecSDimitry Andric
2404b60736ecSDimitry Andric // If we don't know, assume it changes.
2405b60736ecSDimitry Andric return nullptr;
2406b60736ecSDimitry Andric }
2407b60736ecSDimitry Andric
2408e3b55780SDimitry Andric // We don't check unique value for a function, so return std::nullopt.
2409e3b55780SDimitry Andric std::optional<Value *>
getUniqueReplacementValue__anon7bbaa8dc0111::AAICVTrackerFunction2410b60736ecSDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override {
2411e3b55780SDimitry Andric return std::nullopt;
2412b60736ecSDimitry Andric }
2413b60736ecSDimitry Andric
2414b60736ecSDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV.
getReplacementValue__anon7bbaa8dc0111::AAICVTrackerFunction2415e3b55780SDimitry Andric std::optional<Value *> getReplacementValue(InternalControlVar ICV,
2416b60736ecSDimitry Andric const Instruction *I,
2417b60736ecSDimitry Andric Attributor &A) const override {
2418b60736ecSDimitry Andric const auto &ValuesMap = ICVReplacementValuesMap[ICV];
2419b60736ecSDimitry Andric if (ValuesMap.count(I))
2420b60736ecSDimitry Andric return ValuesMap.lookup(I);
2421b60736ecSDimitry Andric
2422b60736ecSDimitry Andric SmallVector<const Instruction *, 16> Worklist;
2423b60736ecSDimitry Andric SmallPtrSet<const Instruction *, 16> Visited;
2424b60736ecSDimitry Andric Worklist.push_back(I);
2425b60736ecSDimitry Andric
2426e3b55780SDimitry Andric std::optional<Value *> ReplVal;
2427b60736ecSDimitry Andric
2428b60736ecSDimitry Andric while (!Worklist.empty()) {
2429b60736ecSDimitry Andric const Instruction *CurrInst = Worklist.pop_back_val();
2430b60736ecSDimitry Andric if (!Visited.insert(CurrInst).second)
2431cfca06d7SDimitry Andric continue;
2432cfca06d7SDimitry Andric
2433b60736ecSDimitry Andric const BasicBlock *CurrBB = CurrInst->getParent();
2434b60736ecSDimitry Andric
2435b60736ecSDimitry Andric // Go up and look for all potential setters/calls that might change the
2436b60736ecSDimitry Andric // ICV.
2437b60736ecSDimitry Andric while ((CurrInst = CurrInst->getPrevNode())) {
2438b60736ecSDimitry Andric if (ValuesMap.count(CurrInst)) {
2439e3b55780SDimitry Andric std::optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
2440b60736ecSDimitry Andric // Unknown value, track new.
2441145449b1SDimitry Andric if (!ReplVal) {
2442b60736ecSDimitry Andric ReplVal = NewReplVal;
2443b60736ecSDimitry Andric break;
2444b60736ecSDimitry Andric }
2445b60736ecSDimitry Andric
2446b60736ecSDimitry Andric // If we found a new value, we can't know the icv value anymore.
2447145449b1SDimitry Andric if (NewReplVal)
2448b60736ecSDimitry Andric if (ReplVal != NewReplVal)
2449cfca06d7SDimitry Andric return nullptr;
2450cfca06d7SDimitry Andric
2451b60736ecSDimitry Andric break;
2452cfca06d7SDimitry Andric }
2453cfca06d7SDimitry Andric
2454e3b55780SDimitry Andric std::optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV);
2455145449b1SDimitry Andric if (!NewReplVal)
2456b60736ecSDimitry Andric continue;
2457b60736ecSDimitry Andric
2458b60736ecSDimitry Andric // Unknown value, track new.
2459145449b1SDimitry Andric if (!ReplVal) {
2460b60736ecSDimitry Andric ReplVal = NewReplVal;
2461b60736ecSDimitry Andric break;
2462cfca06d7SDimitry Andric }
2463cfca06d7SDimitry Andric
2464b60736ecSDimitry Andric // if (NewReplVal.hasValue())
2465b60736ecSDimitry Andric // We found a new value, we can't know the icv value anymore.
2466b60736ecSDimitry Andric if (ReplVal != NewReplVal)
2467cfca06d7SDimitry Andric return nullptr;
2468cfca06d7SDimitry Andric }
2469b60736ecSDimitry Andric
2470b60736ecSDimitry Andric // If we are in the same BB and we have a value, we are done.
2471145449b1SDimitry Andric if (CurrBB == I->getParent() && ReplVal)
2472b60736ecSDimitry Andric return ReplVal;
2473b60736ecSDimitry Andric
2474b60736ecSDimitry Andric // Go through all predecessors and add terminators for analysis.
2475b60736ecSDimitry Andric for (const BasicBlock *Pred : predecessors(CurrBB))
2476b60736ecSDimitry Andric if (const Instruction *Terminator = Pred->getTerminator())
2477b60736ecSDimitry Andric Worklist.push_back(Terminator);
2478b60736ecSDimitry Andric }
2479b60736ecSDimitry Andric
2480b60736ecSDimitry Andric return ReplVal;
2481b60736ecSDimitry Andric }
2482b60736ecSDimitry Andric };
2483b60736ecSDimitry Andric
2484b60736ecSDimitry Andric struct AAICVTrackerFunctionReturned : AAICVTracker {
AAICVTrackerFunctionReturned__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2485b60736ecSDimitry Andric AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
2486b60736ecSDimitry Andric : AAICVTracker(IRP, A) {}
2487b60736ecSDimitry Andric
2488b60736ecSDimitry Andric // FIXME: come up with better string.
getAsStr__anon7bbaa8dc0111::AAICVTrackerFunctionReturned24897fa27ce4SDimitry Andric const std::string getAsStr(Attributor *) const override {
2490b60736ecSDimitry Andric return "ICVTrackerFunctionReturned";
2491b60736ecSDimitry Andric }
2492b60736ecSDimitry Andric
2493b60736ecSDimitry Andric // FIXME: come up with some stats.
trackStatistics__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2494b60736ecSDimitry Andric void trackStatistics() const override {}
2495b60736ecSDimitry Andric
2496b60736ecSDimitry Andric /// We don't manifest anything for this AA.
manifest__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2497b60736ecSDimitry Andric ChangeStatus manifest(Attributor &A) override {
2498b60736ecSDimitry Andric return ChangeStatus::UNCHANGED;
2499b60736ecSDimitry Andric }
2500b60736ecSDimitry Andric
2501b60736ecSDimitry Andric // Map of ICV to their values at specific program point.
2502e3b55780SDimitry Andric EnumeratedArray<std::optional<Value *>, InternalControlVar,
2503b60736ecSDimitry Andric InternalControlVar::ICV___last>
2504b60736ecSDimitry Andric ICVReplacementValuesMap;
2505b60736ecSDimitry Andric
2506b60736ecSDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV.
2507e3b55780SDimitry Andric std::optional<Value *>
getUniqueReplacementValue__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2508b60736ecSDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override {
2509b60736ecSDimitry Andric return ICVReplacementValuesMap[ICV];
2510b60736ecSDimitry Andric }
2511b60736ecSDimitry Andric
updateImpl__anon7bbaa8dc0111::AAICVTrackerFunctionReturned2512b60736ecSDimitry Andric ChangeStatus updateImpl(Attributor &A) override {
2513b60736ecSDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED;
25147fa27ce4SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
2515344a3780SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
2516b60736ecSDimitry Andric
25177fa27ce4SDimitry Andric if (!ICVTrackingAA->isAssumedTracked())
2518b60736ecSDimitry Andric return indicatePessimisticFixpoint();
2519b60736ecSDimitry Andric
2520b60736ecSDimitry Andric for (InternalControlVar ICV : TrackableICVs) {
2521e3b55780SDimitry Andric std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
2522e3b55780SDimitry Andric std::optional<Value *> UniqueICVValue;
2523b60736ecSDimitry Andric
2524b60736ecSDimitry Andric auto CheckReturnInst = [&](Instruction &I) {
2525e3b55780SDimitry Andric std::optional<Value *> NewReplVal =
25267fa27ce4SDimitry Andric ICVTrackingAA->getReplacementValue(ICV, &I, A);
2527b60736ecSDimitry Andric
2528b60736ecSDimitry Andric // If we found a second ICV value there is no unique returned value.
2529145449b1SDimitry Andric if (UniqueICVValue && UniqueICVValue != NewReplVal)
2530b60736ecSDimitry Andric return false;
2531b60736ecSDimitry Andric
2532b60736ecSDimitry Andric UniqueICVValue = NewReplVal;
2533b60736ecSDimitry Andric
2534b60736ecSDimitry Andric return true;
2535b60736ecSDimitry Andric };
2536b60736ecSDimitry Andric
2537344a3780SDimitry Andric bool UsedAssumedInformation = false;
2538b60736ecSDimitry Andric if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
2539344a3780SDimitry Andric UsedAssumedInformation,
2540b60736ecSDimitry Andric /* CheckBBLivenessOnly */ true))
2541b60736ecSDimitry Andric UniqueICVValue = nullptr;
2542b60736ecSDimitry Andric
2543b60736ecSDimitry Andric if (UniqueICVValue == ReplVal)
2544b60736ecSDimitry Andric continue;
2545b60736ecSDimitry Andric
2546b60736ecSDimitry Andric ReplVal = UniqueICVValue;
2547b60736ecSDimitry Andric Changed = ChangeStatus::CHANGED;
2548b60736ecSDimitry Andric }
2549b60736ecSDimitry Andric
2550b60736ecSDimitry Andric return Changed;
2551b60736ecSDimitry Andric }
2552b60736ecSDimitry Andric };
2553b60736ecSDimitry Andric
2554b60736ecSDimitry Andric struct AAICVTrackerCallSite : AAICVTracker {
AAICVTrackerCallSite__anon7bbaa8dc0111::AAICVTrackerCallSite2555b60736ecSDimitry Andric AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
2556b60736ecSDimitry Andric : AAICVTracker(IRP, A) {}
2557b60736ecSDimitry Andric
initialize__anon7bbaa8dc0111::AAICVTrackerCallSite2558b60736ecSDimitry Andric void initialize(Attributor &A) override {
25597fa27ce4SDimitry Andric assert(getAnchorScope() && "Expected anchor function");
2560b60736ecSDimitry Andric
2561b60736ecSDimitry Andric // We only initialize this AA for getters, so we need to know which ICV it
2562b60736ecSDimitry Andric // gets.
2563b60736ecSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2564b60736ecSDimitry Andric for (InternalControlVar ICV : TrackableICVs) {
2565b60736ecSDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[ICV];
2566b60736ecSDimitry Andric auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
2567b60736ecSDimitry Andric if (Getter.Declaration == getAssociatedFunction()) {
2568b60736ecSDimitry Andric AssociatedICV = ICVInfo.Kind;
2569b60736ecSDimitry Andric return;
2570b60736ecSDimitry Andric }
2571b60736ecSDimitry Andric }
2572b60736ecSDimitry Andric
2573b60736ecSDimitry Andric /// Unknown ICV.
2574b60736ecSDimitry Andric indicatePessimisticFixpoint();
2575b60736ecSDimitry Andric }
2576b60736ecSDimitry Andric
manifest__anon7bbaa8dc0111::AAICVTrackerCallSite2577b60736ecSDimitry Andric ChangeStatus manifest(Attributor &A) override {
2578145449b1SDimitry Andric if (!ReplVal || !*ReplVal)
2579b60736ecSDimitry Andric return ChangeStatus::UNCHANGED;
2580b60736ecSDimitry Andric
2581145449b1SDimitry Andric A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal);
2582b60736ecSDimitry Andric A.deleteAfterManifest(*getCtxI());
2583b60736ecSDimitry Andric
2584b60736ecSDimitry Andric return ChangeStatus::CHANGED;
2585b60736ecSDimitry Andric }
2586b60736ecSDimitry Andric
2587b60736ecSDimitry Andric // FIXME: come up with better string.
getAsStr__anon7bbaa8dc0111::AAICVTrackerCallSite25887fa27ce4SDimitry Andric const std::string getAsStr(Attributor *) const override {
25897fa27ce4SDimitry Andric return "ICVTrackerCallSite";
25907fa27ce4SDimitry Andric }
2591b60736ecSDimitry Andric
2592b60736ecSDimitry Andric // FIXME: come up with some stats.
trackStatistics__anon7bbaa8dc0111::AAICVTrackerCallSite2593b60736ecSDimitry Andric void trackStatistics() const override {}
2594b60736ecSDimitry Andric
2595b60736ecSDimitry Andric InternalControlVar AssociatedICV;
2596e3b55780SDimitry Andric std::optional<Value *> ReplVal;
2597b60736ecSDimitry Andric
updateImpl__anon7bbaa8dc0111::AAICVTrackerCallSite2598b60736ecSDimitry Andric ChangeStatus updateImpl(Attributor &A) override {
25997fa27ce4SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
2600344a3780SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
2601b60736ecSDimitry Andric
2602b60736ecSDimitry Andric // We don't have any information, so we assume it changes the ICV.
26037fa27ce4SDimitry Andric if (!ICVTrackingAA->isAssumedTracked())
2604b60736ecSDimitry Andric return indicatePessimisticFixpoint();
2605b60736ecSDimitry Andric
2606e3b55780SDimitry Andric std::optional<Value *> NewReplVal =
26077fa27ce4SDimitry Andric ICVTrackingAA->getReplacementValue(AssociatedICV, getCtxI(), A);
2608b60736ecSDimitry Andric
2609b60736ecSDimitry Andric if (ReplVal == NewReplVal)
2610b60736ecSDimitry Andric return ChangeStatus::UNCHANGED;
2611b60736ecSDimitry Andric
2612b60736ecSDimitry Andric ReplVal = NewReplVal;
2613b60736ecSDimitry Andric return ChangeStatus::CHANGED;
2614b60736ecSDimitry Andric }
2615b60736ecSDimitry Andric
2616b60736ecSDimitry Andric // Return the value with which associated value can be replaced for specific
2617b60736ecSDimitry Andric // \p ICV.
2618e3b55780SDimitry Andric std::optional<Value *>
getUniqueReplacementValue__anon7bbaa8dc0111::AAICVTrackerCallSite2619b60736ecSDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override {
2620b60736ecSDimitry Andric return ReplVal;
2621b60736ecSDimitry Andric }
2622b60736ecSDimitry Andric };
2623b60736ecSDimitry Andric
2624b60736ecSDimitry Andric struct AAICVTrackerCallSiteReturned : AAICVTracker {
AAICVTrackerCallSiteReturned__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2625b60736ecSDimitry Andric AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
2626b60736ecSDimitry Andric : AAICVTracker(IRP, A) {}
2627b60736ecSDimitry Andric
2628b60736ecSDimitry Andric // FIXME: come up with better string.
getAsStr__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned26297fa27ce4SDimitry Andric const std::string getAsStr(Attributor *) const override {
2630b60736ecSDimitry Andric return "ICVTrackerCallSiteReturned";
2631b60736ecSDimitry Andric }
2632b60736ecSDimitry Andric
2633b60736ecSDimitry Andric // FIXME: come up with some stats.
trackStatistics__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2634b60736ecSDimitry Andric void trackStatistics() const override {}
2635b60736ecSDimitry Andric
2636b60736ecSDimitry Andric /// We don't manifest anything for this AA.
manifest__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2637b60736ecSDimitry Andric ChangeStatus manifest(Attributor &A) override {
2638b60736ecSDimitry Andric return ChangeStatus::UNCHANGED;
2639b60736ecSDimitry Andric }
2640b60736ecSDimitry Andric
2641b60736ecSDimitry Andric // Map of ICV to their values at specific program point.
2642e3b55780SDimitry Andric EnumeratedArray<std::optional<Value *>, InternalControlVar,
2643b60736ecSDimitry Andric InternalControlVar::ICV___last>
2644b60736ecSDimitry Andric ICVReplacementValuesMap;
2645b60736ecSDimitry Andric
2646b60736ecSDimitry Andric /// Return the value with which associated value can be replaced for specific
2647b60736ecSDimitry Andric /// \p ICV.
2648e3b55780SDimitry Andric std::optional<Value *>
getUniqueReplacementValue__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2649b60736ecSDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override {
2650b60736ecSDimitry Andric return ICVReplacementValuesMap[ICV];
2651b60736ecSDimitry Andric }
2652b60736ecSDimitry Andric
updateImpl__anon7bbaa8dc0111::AAICVTrackerCallSiteReturned2653b60736ecSDimitry Andric ChangeStatus updateImpl(Attributor &A) override {
2654b60736ecSDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED;
26557fa27ce4SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
2656344a3780SDimitry Andric *this, IRPosition::returned(*getAssociatedFunction()),
2657344a3780SDimitry Andric DepClassTy::REQUIRED);
2658b60736ecSDimitry Andric
2659b60736ecSDimitry Andric // We don't have any information, so we assume it changes the ICV.
26607fa27ce4SDimitry Andric if (!ICVTrackingAA->isAssumedTracked())
2661b60736ecSDimitry Andric return indicatePessimisticFixpoint();
2662b60736ecSDimitry Andric
2663b60736ecSDimitry Andric for (InternalControlVar ICV : TrackableICVs) {
2664e3b55780SDimitry Andric std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
2665e3b55780SDimitry Andric std::optional<Value *> NewReplVal =
26667fa27ce4SDimitry Andric ICVTrackingAA->getUniqueReplacementValue(ICV);
2667b60736ecSDimitry Andric
2668b60736ecSDimitry Andric if (ReplVal == NewReplVal)
2669b60736ecSDimitry Andric continue;
2670b60736ecSDimitry Andric
2671b60736ecSDimitry Andric ReplVal = NewReplVal;
2672b60736ecSDimitry Andric Changed = ChangeStatus::CHANGED;
2673b60736ecSDimitry Andric }
2674b60736ecSDimitry Andric return Changed;
2675b60736ecSDimitry Andric }
2676cfca06d7SDimitry Andric };
2677344a3780SDimitry Andric
2678b1c73532SDimitry Andric /// Determines if \p BB exits the function unconditionally itself or reaches a
2679b1c73532SDimitry Andric /// block that does through only unique successors.
hasFunctionEndAsUniqueSuccessor(const BasicBlock * BB)2680b1c73532SDimitry Andric static bool hasFunctionEndAsUniqueSuccessor(const BasicBlock *BB) {
2681b1c73532SDimitry Andric if (succ_empty(BB))
2682b1c73532SDimitry Andric return true;
2683b1c73532SDimitry Andric const BasicBlock *const Successor = BB->getUniqueSuccessor();
2684b1c73532SDimitry Andric if (!Successor)
2685b1c73532SDimitry Andric return false;
2686b1c73532SDimitry Andric return hasFunctionEndAsUniqueSuccessor(Successor);
2687b1c73532SDimitry Andric }
2688b1c73532SDimitry Andric
2689344a3780SDimitry Andric struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction__anon7bbaa8dc0111::AAExecutionDomainFunction2690344a3780SDimitry Andric AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
2691344a3780SDimitry Andric : AAExecutionDomain(IRP, A) {}
2692344a3780SDimitry Andric
~AAExecutionDomainFunction__anon7bbaa8dc0111::AAExecutionDomainFunction26937fa27ce4SDimitry Andric ~AAExecutionDomainFunction() { delete RPOT; }
2694e3b55780SDimitry Andric
initialize__anon7bbaa8dc0111::AAExecutionDomainFunction2695e3b55780SDimitry Andric void initialize(Attributor &A) override {
26967fa27ce4SDimitry Andric Function *F = getAnchorScope();
26977fa27ce4SDimitry Andric assert(F && "Expected anchor function");
26987fa27ce4SDimitry Andric RPOT = new ReversePostOrderTraversal<Function *>(F);
2699e3b55780SDimitry Andric }
2700e3b55780SDimitry Andric
getAsStr__anon7bbaa8dc0111::AAExecutionDomainFunction27017fa27ce4SDimitry Andric const std::string getAsStr(Attributor *) const override {
27027fa27ce4SDimitry Andric unsigned TotalBlocks = 0, InitialThreadBlocks = 0, AlignedBlocks = 0;
2703e3b55780SDimitry Andric for (auto &It : BEDMap) {
27047fa27ce4SDimitry Andric if (!It.getFirst())
27057fa27ce4SDimitry Andric continue;
2706e3b55780SDimitry Andric TotalBlocks++;
2707e3b55780SDimitry Andric InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly;
27087fa27ce4SDimitry Andric AlignedBlocks += It.getSecond().IsReachedFromAlignedBarrierOnly &&
27097fa27ce4SDimitry Andric It.getSecond().IsReachingAlignedBarrierOnly;
2710e3b55780SDimitry Andric }
2711e3b55780SDimitry Andric return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" +
27127fa27ce4SDimitry Andric std::to_string(AlignedBlocks) + " of " +
27137fa27ce4SDimitry Andric std::to_string(TotalBlocks) +
27147fa27ce4SDimitry Andric " executed by initial thread / aligned";
2715344a3780SDimitry Andric }
2716344a3780SDimitry Andric
2717344a3780SDimitry Andric /// See AbstractAttribute::trackStatistics().
trackStatistics__anon7bbaa8dc0111::AAExecutionDomainFunction2718344a3780SDimitry Andric void trackStatistics() const override {}
2719344a3780SDimitry Andric
manifest__anon7bbaa8dc0111::AAExecutionDomainFunction2720344a3780SDimitry Andric ChangeStatus manifest(Attributor &A) override {
2721344a3780SDimitry Andric LLVM_DEBUG({
2722e3b55780SDimitry Andric for (const BasicBlock &BB : *getAnchorScope()) {
2723e3b55780SDimitry Andric if (!isExecutedByInitialThreadOnly(BB))
2724e3b55780SDimitry Andric continue;
2725344a3780SDimitry Andric dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
2726e3b55780SDimitry Andric << BB.getName() << " is executed by a single thread.\n";
2727e3b55780SDimitry Andric }
2728344a3780SDimitry Andric });
2729e3b55780SDimitry Andric
2730e3b55780SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED;
2731e3b55780SDimitry Andric
2732e3b55780SDimitry Andric if (DisableOpenMPOptBarrierElimination)
2733e3b55780SDimitry Andric return Changed;
2734e3b55780SDimitry Andric
2735e3b55780SDimitry Andric SmallPtrSet<CallBase *, 16> DeletedBarriers;
2736e3b55780SDimitry Andric auto HandleAlignedBarrier = [&](CallBase *CB) {
27377fa27ce4SDimitry Andric const ExecutionDomainTy &ED = CB ? CEDMap[{CB, PRE}] : BEDMap[nullptr];
2738e3b55780SDimitry Andric if (!ED.IsReachedFromAlignedBarrierOnly ||
2739e3b55780SDimitry Andric ED.EncounteredNonLocalSideEffect)
2740e3b55780SDimitry Andric return;
2741b1c73532SDimitry Andric if (!ED.EncounteredAssumes.empty() && !A.isModulePass())
2742b1c73532SDimitry Andric return;
2743e3b55780SDimitry Andric
2744b1c73532SDimitry Andric // We can remove this barrier, if it is one, or aligned barriers reaching
2745b1c73532SDimitry Andric // the kernel end (if CB is nullptr). Aligned barriers reaching the kernel
2746b1c73532SDimitry Andric // end should only be removed if the kernel end is their unique successor;
2747b1c73532SDimitry Andric // otherwise, they may have side-effects that aren't accounted for in the
2748b1c73532SDimitry Andric // kernel end in their other successors. If those barriers have other
2749b1c73532SDimitry Andric // barriers reaching them, those can be transitively removed as well as
2750b1c73532SDimitry Andric // long as the kernel end is also their unique successor.
2751e3b55780SDimitry Andric if (CB) {
2752e3b55780SDimitry Andric DeletedBarriers.insert(CB);
2753e3b55780SDimitry Andric A.deleteAfterManifest(*CB);
2754e3b55780SDimitry Andric ++NumBarriersEliminated;
2755e3b55780SDimitry Andric Changed = ChangeStatus::CHANGED;
2756e3b55780SDimitry Andric } else if (!ED.AlignedBarriers.empty()) {
2757e3b55780SDimitry Andric Changed = ChangeStatus::CHANGED;
2758e3b55780SDimitry Andric SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(),
2759e3b55780SDimitry Andric ED.AlignedBarriers.end());
2760e3b55780SDimitry Andric SmallSetVector<CallBase *, 16> Visited;
2761e3b55780SDimitry Andric while (!Worklist.empty()) {
2762e3b55780SDimitry Andric CallBase *LastCB = Worklist.pop_back_val();
2763e3b55780SDimitry Andric if (!Visited.insert(LastCB))
2764e3b55780SDimitry Andric continue;
27657fa27ce4SDimitry Andric if (LastCB->getFunction() != getAnchorScope())
27667fa27ce4SDimitry Andric continue;
2767b1c73532SDimitry Andric if (!hasFunctionEndAsUniqueSuccessor(LastCB->getParent()))
2768b1c73532SDimitry Andric continue;
2769e3b55780SDimitry Andric if (!DeletedBarriers.count(LastCB)) {
2770b1c73532SDimitry Andric ++NumBarriersEliminated;
2771e3b55780SDimitry Andric A.deleteAfterManifest(*LastCB);
2772e3b55780SDimitry Andric continue;
2773e3b55780SDimitry Andric }
2774e3b55780SDimitry Andric // The final aligned barrier (LastCB) reaching the kernel end was
2775e3b55780SDimitry Andric // removed already. This means we can go one step further and remove
2776e3b55780SDimitry Andric // the barriers encoutered last before (LastCB).
27777fa27ce4SDimitry Andric const ExecutionDomainTy &LastED = CEDMap[{LastCB, PRE}];
2778e3b55780SDimitry Andric Worklist.append(LastED.AlignedBarriers.begin(),
2779e3b55780SDimitry Andric LastED.AlignedBarriers.end());
2780e3b55780SDimitry Andric }
2781344a3780SDimitry Andric }
2782344a3780SDimitry Andric
2783e3b55780SDimitry Andric // If we actually eliminated a barrier we need to eliminate the associated
2784e3b55780SDimitry Andric // llvm.assumes as well to avoid creating UB.
2785e3b55780SDimitry Andric if (!ED.EncounteredAssumes.empty() && (CB || !ED.AlignedBarriers.empty()))
2786e3b55780SDimitry Andric for (auto *AssumeCB : ED.EncounteredAssumes)
2787e3b55780SDimitry Andric A.deleteAfterManifest(*AssumeCB);
2788344a3780SDimitry Andric };
2789344a3780SDimitry Andric
2790e3b55780SDimitry Andric for (auto *CB : AlignedBarriers)
2791e3b55780SDimitry Andric HandleAlignedBarrier(CB);
2792344a3780SDimitry Andric
2793e3b55780SDimitry Andric // Handle the "kernel end barrier" for kernels too.
2794b1c73532SDimitry Andric if (omp::isOpenMPKernel(*getAnchorScope()))
2795e3b55780SDimitry Andric HandleAlignedBarrier(nullptr);
2796e3b55780SDimitry Andric
2797e3b55780SDimitry Andric return Changed;
2798e3b55780SDimitry Andric }
2799e3b55780SDimitry Andric
isNoOpFence__anon7bbaa8dc0111::AAExecutionDomainFunction28007fa27ce4SDimitry Andric bool isNoOpFence(const FenceInst &FI) const override {
28017fa27ce4SDimitry Andric return getState().isValidState() && !NonNoOpFences.count(&FI);
28027fa27ce4SDimitry Andric }
28037fa27ce4SDimitry Andric
2804e3b55780SDimitry Andric /// Merge barrier and assumption information from \p PredED into the successor
2805e3b55780SDimitry Andric /// \p ED.
2806e3b55780SDimitry Andric void
2807e3b55780SDimitry Andric mergeInPredecessorBarriersAndAssumptions(Attributor &A, ExecutionDomainTy &ED,
2808e3b55780SDimitry Andric const ExecutionDomainTy &PredED);
2809e3b55780SDimitry Andric
2810e3b55780SDimitry Andric /// Merge all information from \p PredED into the successor \p ED. If
2811e3b55780SDimitry Andric /// \p InitialEdgeOnly is set, only the initial edge will enter the block
2812e3b55780SDimitry Andric /// represented by \p ED from this predecessor.
28137fa27ce4SDimitry Andric bool mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED,
2814e3b55780SDimitry Andric const ExecutionDomainTy &PredED,
2815e3b55780SDimitry Andric bool InitialEdgeOnly = false);
2816e3b55780SDimitry Andric
2817e3b55780SDimitry Andric /// Accumulate information for the entry block in \p EntryBBED.
28187fa27ce4SDimitry Andric bool handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED);
2819e3b55780SDimitry Andric
2820e3b55780SDimitry Andric /// See AbstractAttribute::updateImpl.
2821e3b55780SDimitry Andric ChangeStatus updateImpl(Attributor &A) override;
2822e3b55780SDimitry Andric
2823e3b55780SDimitry Andric /// Query interface, see AAExecutionDomain
2824e3b55780SDimitry Andric ///{
isExecutedByInitialThreadOnly__anon7bbaa8dc0111::AAExecutionDomainFunction2825e3b55780SDimitry Andric bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
2826e3b55780SDimitry Andric if (!isValidState())
2827e3b55780SDimitry Andric return false;
28287fa27ce4SDimitry Andric assert(BB.getParent() == getAnchorScope() && "Block is out of scope!");
2829e3b55780SDimitry Andric return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly;
2830e3b55780SDimitry Andric }
2831e3b55780SDimitry Andric
isExecutedInAlignedRegion__anon7bbaa8dc0111::AAExecutionDomainFunction2832e3b55780SDimitry Andric bool isExecutedInAlignedRegion(Attributor &A,
2833e3b55780SDimitry Andric const Instruction &I) const override {
28347fa27ce4SDimitry Andric assert(I.getFunction() == getAnchorScope() &&
28357fa27ce4SDimitry Andric "Instruction is out of scope!");
28367fa27ce4SDimitry Andric if (!isValidState())
2837e3b55780SDimitry Andric return false;
2838e3b55780SDimitry Andric
28397fa27ce4SDimitry Andric bool ForwardIsOk = true;
2840e3b55780SDimitry Andric const Instruction *CurI;
2841e3b55780SDimitry Andric
2842e3b55780SDimitry Andric // Check forward until a call or the block end is reached.
2843e3b55780SDimitry Andric CurI = &I;
2844e3b55780SDimitry Andric do {
2845e3b55780SDimitry Andric auto *CB = dyn_cast<CallBase>(CurI);
2846e3b55780SDimitry Andric if (!CB)
2847e3b55780SDimitry Andric continue;
28487fa27ce4SDimitry Andric if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
28497fa27ce4SDimitry Andric return true;
28507fa27ce4SDimitry Andric const auto &It = CEDMap.find({CB, PRE});
2851e3b55780SDimitry Andric if (It == CEDMap.end())
2852e3b55780SDimitry Andric continue;
28537fa27ce4SDimitry Andric if (!It->getSecond().IsReachingAlignedBarrierOnly)
28547fa27ce4SDimitry Andric ForwardIsOk = false;
28557fa27ce4SDimitry Andric break;
2856e3b55780SDimitry Andric } while ((CurI = CurI->getNextNonDebugInstruction()));
2857e3b55780SDimitry Andric
28587fa27ce4SDimitry Andric if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
28597fa27ce4SDimitry Andric ForwardIsOk = false;
2860e3b55780SDimitry Andric
2861e3b55780SDimitry Andric // Check backward until a call or the block beginning is reached.
2862e3b55780SDimitry Andric CurI = &I;
2863e3b55780SDimitry Andric do {
2864e3b55780SDimitry Andric auto *CB = dyn_cast<CallBase>(CurI);
2865e3b55780SDimitry Andric if (!CB)
2866e3b55780SDimitry Andric continue;
28677fa27ce4SDimitry Andric if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
28687fa27ce4SDimitry Andric return true;
28697fa27ce4SDimitry Andric const auto &It = CEDMap.find({CB, POST});
2870e3b55780SDimitry Andric if (It == CEDMap.end())
2871e3b55780SDimitry Andric continue;
2872e3b55780SDimitry Andric if (It->getSecond().IsReachedFromAlignedBarrierOnly)
2873e3b55780SDimitry Andric break;
2874e3b55780SDimitry Andric return false;
2875e3b55780SDimitry Andric } while ((CurI = CurI->getPrevNonDebugInstruction()));
2876e3b55780SDimitry Andric
28777fa27ce4SDimitry Andric // Delayed decision on the forward pass to allow aligned barrier detection
28787fa27ce4SDimitry Andric // in the backwards traversal.
28797fa27ce4SDimitry Andric if (!ForwardIsOk)
28807fa27ce4SDimitry Andric return false;
28817fa27ce4SDimitry Andric
28827fa27ce4SDimitry Andric if (!CurI) {
28837fa27ce4SDimitry Andric const BasicBlock *BB = I.getParent();
28847fa27ce4SDimitry Andric if (BB == &BB->getParent()->getEntryBlock())
28857fa27ce4SDimitry Andric return BEDMap.lookup(nullptr).IsReachedFromAlignedBarrierOnly;
28867fa27ce4SDimitry Andric if (!llvm::all_of(predecessors(BB), [&](const BasicBlock *PredBB) {
2887e3b55780SDimitry Andric return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly;
2888e3b55780SDimitry Andric })) {
2889e3b55780SDimitry Andric return false;
2890e3b55780SDimitry Andric }
28917fa27ce4SDimitry Andric }
2892e3b55780SDimitry Andric
2893e3b55780SDimitry Andric // On neither traversal we found a anything but aligned barriers.
2894e3b55780SDimitry Andric return true;
2895e3b55780SDimitry Andric }
2896e3b55780SDimitry Andric
getExecutionDomain__anon7bbaa8dc0111::AAExecutionDomainFunction2897e3b55780SDimitry Andric ExecutionDomainTy getExecutionDomain(const BasicBlock &BB) const override {
2898e3b55780SDimitry Andric assert(isValidState() &&
2899e3b55780SDimitry Andric "No request should be made against an invalid state!");
2900e3b55780SDimitry Andric return BEDMap.lookup(&BB);
2901e3b55780SDimitry Andric }
29027fa27ce4SDimitry Andric std::pair<ExecutionDomainTy, ExecutionDomainTy>
getExecutionDomain__anon7bbaa8dc0111::AAExecutionDomainFunction29037fa27ce4SDimitry Andric getExecutionDomain(const CallBase &CB) const override {
2904e3b55780SDimitry Andric assert(isValidState() &&
2905e3b55780SDimitry Andric "No request should be made against an invalid state!");
29067fa27ce4SDimitry Andric return {CEDMap.lookup({&CB, PRE}), CEDMap.lookup({&CB, POST})};
2907e3b55780SDimitry Andric }
getFunctionExecutionDomain__anon7bbaa8dc0111::AAExecutionDomainFunction2908e3b55780SDimitry Andric ExecutionDomainTy getFunctionExecutionDomain() const override {
2909e3b55780SDimitry Andric assert(isValidState() &&
2910e3b55780SDimitry Andric "No request should be made against an invalid state!");
29117fa27ce4SDimitry Andric return InterProceduralED;
2912e3b55780SDimitry Andric }
2913e3b55780SDimitry Andric ///}
2914344a3780SDimitry Andric
2915c0981da4SDimitry Andric // Check if the edge into the successor block contains a condition that only
2916c0981da4SDimitry Andric // lets the main thread execute it.
isInitialThreadOnlyEdge__anon7bbaa8dc0111::AAExecutionDomainFunction2917e3b55780SDimitry Andric static bool isInitialThreadOnlyEdge(Attributor &A, BranchInst *Edge,
2918e3b55780SDimitry Andric BasicBlock &SuccessorBB) {
2919344a3780SDimitry Andric if (!Edge || !Edge->isConditional())
2920344a3780SDimitry Andric return false;
2921e3b55780SDimitry Andric if (Edge->getSuccessor(0) != &SuccessorBB)
2922344a3780SDimitry Andric return false;
2923344a3780SDimitry Andric
2924344a3780SDimitry Andric auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
2925344a3780SDimitry Andric if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
2926344a3780SDimitry Andric return false;
2927344a3780SDimitry Andric
2928344a3780SDimitry Andric ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
2929344a3780SDimitry Andric if (!C)
2930344a3780SDimitry Andric return false;
2931344a3780SDimitry Andric
2932344a3780SDimitry Andric // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
2933344a3780SDimitry Andric if (C->isAllOnesValue()) {
2934344a3780SDimitry Andric auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
2935e3b55780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2936e3b55780SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2937344a3780SDimitry Andric CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
2938344a3780SDimitry Andric if (!CB)
2939344a3780SDimitry Andric return false;
2940b1c73532SDimitry Andric ConstantStruct *KernelEnvC =
2941b1c73532SDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(CB);
2942b1c73532SDimitry Andric ConstantInt *ExecModeC =
2943b1c73532SDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
2944b1c73532SDimitry Andric return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC;
2945c0981da4SDimitry Andric }
2946c0981da4SDimitry Andric
2947c0981da4SDimitry Andric if (C->isZero()) {
2948c0981da4SDimitry Andric // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
2949c0981da4SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
2950c0981da4SDimitry Andric if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
2951c0981da4SDimitry Andric return true;
2952c0981da4SDimitry Andric
2953c0981da4SDimitry Andric // Match: 0 == llvm.amdgcn.workitem.id.x()
2954c0981da4SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
2955c0981da4SDimitry Andric if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
2956c0981da4SDimitry Andric return true;
2957344a3780SDimitry Andric }
2958344a3780SDimitry Andric
2959344a3780SDimitry Andric return false;
2960344a3780SDimitry Andric };
2961344a3780SDimitry Andric
29627fa27ce4SDimitry Andric /// Mapping containing information about the function for other AAs.
29637fa27ce4SDimitry Andric ExecutionDomainTy InterProceduralED;
29647fa27ce4SDimitry Andric
29657fa27ce4SDimitry Andric enum Direction { PRE = 0, POST = 1 };
2966e3b55780SDimitry Andric /// Mapping containing information per block.
2967e3b55780SDimitry Andric DenseMap<const BasicBlock *, ExecutionDomainTy> BEDMap;
29687fa27ce4SDimitry Andric DenseMap<PointerIntPair<const CallBase *, 1, Direction>, ExecutionDomainTy>
29697fa27ce4SDimitry Andric CEDMap;
2970e3b55780SDimitry Andric SmallSetVector<CallBase *, 16> AlignedBarriers;
2971344a3780SDimitry Andric
2972e3b55780SDimitry Andric ReversePostOrderTraversal<Function *> *RPOT = nullptr;
29737fa27ce4SDimitry Andric
29747fa27ce4SDimitry Andric /// Set \p R to \V and report true if that changed \p R.
setAndRecord__anon7bbaa8dc0111::AAExecutionDomainFunction29757fa27ce4SDimitry Andric static bool setAndRecord(bool &R, bool V) {
29767fa27ce4SDimitry Andric bool Eq = (R == V);
29777fa27ce4SDimitry Andric R = V;
29787fa27ce4SDimitry Andric return !Eq;
29797fa27ce4SDimitry Andric }
29807fa27ce4SDimitry Andric
29817fa27ce4SDimitry Andric /// Collection of fences known to be non-no-opt. All fences not in this set
29827fa27ce4SDimitry Andric /// can be assumed no-opt.
29837fa27ce4SDimitry Andric SmallPtrSet<const FenceInst *, 8> NonNoOpFences;
2984344a3780SDimitry Andric };
2985344a3780SDimitry Andric
mergeInPredecessorBarriersAndAssumptions(Attributor & A,ExecutionDomainTy & ED,const ExecutionDomainTy & PredED)2986e3b55780SDimitry Andric void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
2987e3b55780SDimitry Andric Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED) {
2988e3b55780SDimitry Andric for (auto *EA : PredED.EncounteredAssumes)
2989e3b55780SDimitry Andric ED.addAssumeInst(A, *EA);
2990e3b55780SDimitry Andric
2991e3b55780SDimitry Andric for (auto *AB : PredED.AlignedBarriers)
2992e3b55780SDimitry Andric ED.addAlignedBarrier(A, *AB);
2993344a3780SDimitry Andric }
2994344a3780SDimitry Andric
mergeInPredecessor(Attributor & A,ExecutionDomainTy & ED,const ExecutionDomainTy & PredED,bool InitialEdgeOnly)29957fa27ce4SDimitry Andric bool AAExecutionDomainFunction::mergeInPredecessor(
2996e3b55780SDimitry Andric Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED,
2997e3b55780SDimitry Andric bool InitialEdgeOnly) {
2998e3b55780SDimitry Andric
29997fa27ce4SDimitry Andric bool Changed = false;
30007fa27ce4SDimitry Andric Changed |=
30017fa27ce4SDimitry Andric setAndRecord(ED.IsExecutedByInitialThreadOnly,
30027fa27ce4SDimitry Andric InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly &&
30037fa27ce4SDimitry Andric ED.IsExecutedByInitialThreadOnly));
30047fa27ce4SDimitry Andric
30057fa27ce4SDimitry Andric Changed |= setAndRecord(ED.IsReachedFromAlignedBarrierOnly,
30067fa27ce4SDimitry Andric ED.IsReachedFromAlignedBarrierOnly &&
30077fa27ce4SDimitry Andric PredED.IsReachedFromAlignedBarrierOnly);
30087fa27ce4SDimitry Andric Changed |= setAndRecord(ED.EncounteredNonLocalSideEffect,
30097fa27ce4SDimitry Andric ED.EncounteredNonLocalSideEffect |
30107fa27ce4SDimitry Andric PredED.EncounteredNonLocalSideEffect);
30117fa27ce4SDimitry Andric // Do not track assumptions and barriers as part of Changed.
3012e3b55780SDimitry Andric if (ED.IsReachedFromAlignedBarrierOnly)
3013e3b55780SDimitry Andric mergeInPredecessorBarriersAndAssumptions(A, ED, PredED);
3014e3b55780SDimitry Andric else
3015e3b55780SDimitry Andric ED.clearAssumeInstAndAlignedBarriers();
30167fa27ce4SDimitry Andric return Changed;
3017e3b55780SDimitry Andric }
3018e3b55780SDimitry Andric
handleCallees(Attributor & A,ExecutionDomainTy & EntryBBED)30197fa27ce4SDimitry Andric bool AAExecutionDomainFunction::handleCallees(Attributor &A,
3020e3b55780SDimitry Andric ExecutionDomainTy &EntryBBED) {
30217fa27ce4SDimitry Andric SmallVector<std::pair<ExecutionDomainTy, ExecutionDomainTy>, 4> CallSiteEDs;
3022e3b55780SDimitry Andric auto PredForCallSite = [&](AbstractCallSite ACS) {
30237fa27ce4SDimitry Andric const auto *EDAA = A.getAAFor<AAExecutionDomain>(
3024e3b55780SDimitry Andric *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
3025e3b55780SDimitry Andric DepClassTy::OPTIONAL);
30267fa27ce4SDimitry Andric if (!EDAA || !EDAA->getState().isValidState())
3027e3b55780SDimitry Andric return false;
30287fa27ce4SDimitry Andric CallSiteEDs.emplace_back(
30297fa27ce4SDimitry Andric EDAA->getExecutionDomain(*cast<CallBase>(ACS.getInstruction())));
3030e3b55780SDimitry Andric return true;
3031e3b55780SDimitry Andric };
3032e3b55780SDimitry Andric
30337fa27ce4SDimitry Andric ExecutionDomainTy ExitED;
3034e3b55780SDimitry Andric bool AllCallSitesKnown;
3035e3b55780SDimitry Andric if (A.checkForAllCallSites(PredForCallSite, *this,
3036e3b55780SDimitry Andric /* RequiresAllCallSites */ true,
3037e3b55780SDimitry Andric AllCallSitesKnown)) {
30387fa27ce4SDimitry Andric for (const auto &[CSInED, CSOutED] : CallSiteEDs) {
30397fa27ce4SDimitry Andric mergeInPredecessor(A, EntryBBED, CSInED);
30407fa27ce4SDimitry Andric ExitED.IsReachingAlignedBarrierOnly &=
30417fa27ce4SDimitry Andric CSOutED.IsReachingAlignedBarrierOnly;
30427fa27ce4SDimitry Andric }
3043e3b55780SDimitry Andric
3044e3b55780SDimitry Andric } else {
3045e3b55780SDimitry Andric // We could not find all predecessors, so this is either a kernel or a
3046e3b55780SDimitry Andric // function with external linkage (or with some other weird uses).
3047b1c73532SDimitry Andric if (omp::isOpenMPKernel(*getAnchorScope())) {
3048e3b55780SDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly = false;
3049e3b55780SDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly = true;
3050e3b55780SDimitry Andric EntryBBED.EncounteredNonLocalSideEffect = false;
3051b1c73532SDimitry Andric ExitED.IsReachingAlignedBarrierOnly = false;
3052e3b55780SDimitry Andric } else {
3053e3b55780SDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly = false;
3054e3b55780SDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly = false;
3055e3b55780SDimitry Andric EntryBBED.EncounteredNonLocalSideEffect = true;
30567fa27ce4SDimitry Andric ExitED.IsReachingAlignedBarrierOnly = false;
3057e3b55780SDimitry Andric }
3058e3b55780SDimitry Andric }
3059e3b55780SDimitry Andric
30607fa27ce4SDimitry Andric bool Changed = false;
3061e3b55780SDimitry Andric auto &FnED = BEDMap[nullptr];
30627fa27ce4SDimitry Andric Changed |= setAndRecord(FnED.IsReachedFromAlignedBarrierOnly,
30637fa27ce4SDimitry Andric FnED.IsReachedFromAlignedBarrierOnly &
30647fa27ce4SDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly);
30657fa27ce4SDimitry Andric Changed |= setAndRecord(FnED.IsReachingAlignedBarrierOnly,
30667fa27ce4SDimitry Andric FnED.IsReachingAlignedBarrierOnly &
30677fa27ce4SDimitry Andric ExitED.IsReachingAlignedBarrierOnly);
30687fa27ce4SDimitry Andric Changed |= setAndRecord(FnED.IsExecutedByInitialThreadOnly,
30697fa27ce4SDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly);
30707fa27ce4SDimitry Andric return Changed;
3071e3b55780SDimitry Andric }
3072e3b55780SDimitry Andric
updateImpl(Attributor & A)3073e3b55780SDimitry Andric ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
3074e3b55780SDimitry Andric
3075e3b55780SDimitry Andric bool Changed = false;
3076e3b55780SDimitry Andric
3077e3b55780SDimitry Andric // Helper to deal with an aligned barrier encountered during the forward
3078e3b55780SDimitry Andric // traversal. \p CB is the aligned barrier, \p ED is the execution domain when
3079e3b55780SDimitry Andric // it was encountered.
30807fa27ce4SDimitry Andric auto HandleAlignedBarrier = [&](CallBase &CB, ExecutionDomainTy &ED) {
30817fa27ce4SDimitry Andric Changed |= AlignedBarriers.insert(&CB);
3082e3b55780SDimitry Andric // First, update the barrier ED kept in the separate CEDMap.
30837fa27ce4SDimitry Andric auto &CallInED = CEDMap[{&CB, PRE}];
30847fa27ce4SDimitry Andric Changed |= mergeInPredecessor(A, CallInED, ED);
30857fa27ce4SDimitry Andric CallInED.IsReachingAlignedBarrierOnly = true;
3086e3b55780SDimitry Andric // Next adjust the ED we use for the traversal.
3087e3b55780SDimitry Andric ED.EncounteredNonLocalSideEffect = false;
3088e3b55780SDimitry Andric ED.IsReachedFromAlignedBarrierOnly = true;
3089e3b55780SDimitry Andric // Aligned barrier collection has to come last.
3090e3b55780SDimitry Andric ED.clearAssumeInstAndAlignedBarriers();
30917fa27ce4SDimitry Andric ED.addAlignedBarrier(A, CB);
30927fa27ce4SDimitry Andric auto &CallOutED = CEDMap[{&CB, POST}];
30937fa27ce4SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED);
3094e3b55780SDimitry Andric };
3095e3b55780SDimitry Andric
30967fa27ce4SDimitry Andric auto *LivenessAA =
3097e3b55780SDimitry Andric A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
3098e3b55780SDimitry Andric
3099e3b55780SDimitry Andric Function *F = getAnchorScope();
3100e3b55780SDimitry Andric BasicBlock &EntryBB = F->getEntryBlock();
3101b1c73532SDimitry Andric bool IsKernel = omp::isOpenMPKernel(*F);
3102e3b55780SDimitry Andric
3103e3b55780SDimitry Andric SmallVector<Instruction *> SyncInstWorklist;
3104e3b55780SDimitry Andric for (auto &RIt : *RPOT) {
3105e3b55780SDimitry Andric BasicBlock &BB = *RIt;
3106e3b55780SDimitry Andric
3107e3b55780SDimitry Andric bool IsEntryBB = &BB == &EntryBB;
3108e3b55780SDimitry Andric // TODO: We use local reasoning since we don't have a divergence analysis
3109e3b55780SDimitry Andric // running as well. We could basically allow uniform branches here.
3110e3b55780SDimitry Andric bool AlignedBarrierLastInBlock = IsEntryBB && IsKernel;
31117fa27ce4SDimitry Andric bool IsExplicitlyAligned = IsEntryBB && IsKernel;
3112e3b55780SDimitry Andric ExecutionDomainTy ED;
3113e3b55780SDimitry Andric // Propagate "incoming edges" into information about this block.
3114e3b55780SDimitry Andric if (IsEntryBB) {
31157fa27ce4SDimitry Andric Changed |= handleCallees(A, ED);
3116e3b55780SDimitry Andric } else {
3117e3b55780SDimitry Andric // For live non-entry blocks we only propagate
3118e3b55780SDimitry Andric // information via live edges.
31197fa27ce4SDimitry Andric if (LivenessAA && LivenessAA->isAssumedDead(&BB))
3120e3b55780SDimitry Andric continue;
3121e3b55780SDimitry Andric
3122e3b55780SDimitry Andric for (auto *PredBB : predecessors(&BB)) {
31237fa27ce4SDimitry Andric if (LivenessAA && LivenessAA->isEdgeDead(PredBB, &BB))
3124e3b55780SDimitry Andric continue;
3125e3b55780SDimitry Andric bool InitialEdgeOnly = isInitialThreadOnlyEdge(
3126e3b55780SDimitry Andric A, dyn_cast<BranchInst>(PredBB->getTerminator()), BB);
3127e3b55780SDimitry Andric mergeInPredecessor(A, ED, BEDMap[PredBB], InitialEdgeOnly);
3128e3b55780SDimitry Andric }
3129e3b55780SDimitry Andric }
3130e3b55780SDimitry Andric
3131e3b55780SDimitry Andric // Now we traverse the block, accumulate effects in ED and attach
3132e3b55780SDimitry Andric // information to calls.
3133e3b55780SDimitry Andric for (Instruction &I : BB) {
3134e3b55780SDimitry Andric bool UsedAssumedInformation;
31357fa27ce4SDimitry Andric if (A.isAssumedDead(I, *this, LivenessAA, UsedAssumedInformation,
3136e3b55780SDimitry Andric /* CheckBBLivenessOnly */ false, DepClassTy::OPTIONAL,
3137e3b55780SDimitry Andric /* CheckForDeadStore */ true))
3138e3b55780SDimitry Andric continue;
3139e3b55780SDimitry Andric
3140e3b55780SDimitry Andric // Asummes and "assume-like" (dbg, lifetime, ...) are handled first, the
3141e3b55780SDimitry Andric // former is collected the latter is ignored.
3142e3b55780SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
3143e3b55780SDimitry Andric if (auto *AI = dyn_cast_or_null<AssumeInst>(II)) {
3144e3b55780SDimitry Andric ED.addAssumeInst(A, *AI);
3145e3b55780SDimitry Andric continue;
3146e3b55780SDimitry Andric }
3147e3b55780SDimitry Andric // TODO: Should we also collect and delete lifetime markers?
3148e3b55780SDimitry Andric if (II->isAssumeLikeIntrinsic())
3149e3b55780SDimitry Andric continue;
3150e3b55780SDimitry Andric }
3151e3b55780SDimitry Andric
31527fa27ce4SDimitry Andric if (auto *FI = dyn_cast<FenceInst>(&I)) {
31537fa27ce4SDimitry Andric if (!ED.EncounteredNonLocalSideEffect) {
31547fa27ce4SDimitry Andric // An aligned fence without non-local side-effects is a no-op.
31557fa27ce4SDimitry Andric if (ED.IsReachedFromAlignedBarrierOnly)
31567fa27ce4SDimitry Andric continue;
31577fa27ce4SDimitry Andric // A non-aligned fence without non-local side-effects is a no-op
31587fa27ce4SDimitry Andric // if the ordering only publishes non-local side-effects (or less).
31597fa27ce4SDimitry Andric switch (FI->getOrdering()) {
31607fa27ce4SDimitry Andric case AtomicOrdering::NotAtomic:
31617fa27ce4SDimitry Andric continue;
31627fa27ce4SDimitry Andric case AtomicOrdering::Unordered:
31637fa27ce4SDimitry Andric continue;
31647fa27ce4SDimitry Andric case AtomicOrdering::Monotonic:
31657fa27ce4SDimitry Andric continue;
31667fa27ce4SDimitry Andric case AtomicOrdering::Acquire:
31677fa27ce4SDimitry Andric break;
31687fa27ce4SDimitry Andric case AtomicOrdering::Release:
31697fa27ce4SDimitry Andric continue;
31707fa27ce4SDimitry Andric case AtomicOrdering::AcquireRelease:
31717fa27ce4SDimitry Andric break;
31727fa27ce4SDimitry Andric case AtomicOrdering::SequentiallyConsistent:
31737fa27ce4SDimitry Andric break;
31747fa27ce4SDimitry Andric };
31757fa27ce4SDimitry Andric }
31767fa27ce4SDimitry Andric NonNoOpFences.insert(FI);
31777fa27ce4SDimitry Andric }
31787fa27ce4SDimitry Andric
3179e3b55780SDimitry Andric auto *CB = dyn_cast<CallBase>(&I);
3180e3b55780SDimitry Andric bool IsNoSync = AA::isNoSyncInst(A, I, *this);
3181e3b55780SDimitry Andric bool IsAlignedBarrier =
3182e3b55780SDimitry Andric !IsNoSync && CB &&
3183e3b55780SDimitry Andric AANoSync::isAlignedBarrier(*CB, AlignedBarrierLastInBlock);
3184e3b55780SDimitry Andric
3185e3b55780SDimitry Andric AlignedBarrierLastInBlock &= IsNoSync;
31867fa27ce4SDimitry Andric IsExplicitlyAligned &= IsNoSync;
3187e3b55780SDimitry Andric
3188e3b55780SDimitry Andric // Next we check for calls. Aligned barriers are handled
3189e3b55780SDimitry Andric // explicitly, everything else is kept for the backward traversal and will
3190e3b55780SDimitry Andric // also affect our state.
3191e3b55780SDimitry Andric if (CB) {
3192e3b55780SDimitry Andric if (IsAlignedBarrier) {
31937fa27ce4SDimitry Andric HandleAlignedBarrier(*CB, ED);
3194e3b55780SDimitry Andric AlignedBarrierLastInBlock = true;
31957fa27ce4SDimitry Andric IsExplicitlyAligned = true;
3196e3b55780SDimitry Andric continue;
3197e3b55780SDimitry Andric }
3198e3b55780SDimitry Andric
3199e3b55780SDimitry Andric // Check the pointer(s) of a memory intrinsic explicitly.
3200e3b55780SDimitry Andric if (isa<MemIntrinsic>(&I)) {
3201e3b55780SDimitry Andric if (!ED.EncounteredNonLocalSideEffect &&
3202e3b55780SDimitry Andric AA::isPotentiallyAffectedByBarrier(A, I, *this))
3203e3b55780SDimitry Andric ED.EncounteredNonLocalSideEffect = true;
3204e3b55780SDimitry Andric if (!IsNoSync) {
3205e3b55780SDimitry Andric ED.IsReachedFromAlignedBarrierOnly = false;
3206e3b55780SDimitry Andric SyncInstWorklist.push_back(&I);
3207e3b55780SDimitry Andric }
3208e3b55780SDimitry Andric continue;
3209e3b55780SDimitry Andric }
3210e3b55780SDimitry Andric
3211e3b55780SDimitry Andric // Record how we entered the call, then accumulate the effect of the
3212e3b55780SDimitry Andric // call in ED for potential use by the callee.
32137fa27ce4SDimitry Andric auto &CallInED = CEDMap[{CB, PRE}];
32147fa27ce4SDimitry Andric Changed |= mergeInPredecessor(A, CallInED, ED);
3215e3b55780SDimitry Andric
3216e3b55780SDimitry Andric // If we have a sync-definition we can check if it starts/ends in an
3217e3b55780SDimitry Andric // aligned barrier. If we are unsure we assume any sync breaks
3218e3b55780SDimitry Andric // alignment.
3219e3b55780SDimitry Andric Function *Callee = CB->getCalledFunction();
3220e3b55780SDimitry Andric if (!IsNoSync && Callee && !Callee->isDeclaration()) {
32217fa27ce4SDimitry Andric const auto *EDAA = A.getAAFor<AAExecutionDomain>(
3222e3b55780SDimitry Andric *this, IRPosition::function(*Callee), DepClassTy::OPTIONAL);
32237fa27ce4SDimitry Andric if (EDAA && EDAA->getState().isValidState()) {
32247fa27ce4SDimitry Andric const auto &CalleeED = EDAA->getFunctionExecutionDomain();
3225e3b55780SDimitry Andric ED.IsReachedFromAlignedBarrierOnly =
3226e3b55780SDimitry Andric CalleeED.IsReachedFromAlignedBarrierOnly;
3227e3b55780SDimitry Andric AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
3228e3b55780SDimitry Andric if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
3229e3b55780SDimitry Andric ED.EncounteredNonLocalSideEffect |=
3230e3b55780SDimitry Andric CalleeED.EncounteredNonLocalSideEffect;
3231e3b55780SDimitry Andric else
3232e3b55780SDimitry Andric ED.EncounteredNonLocalSideEffect =
3233e3b55780SDimitry Andric CalleeED.EncounteredNonLocalSideEffect;
32347fa27ce4SDimitry Andric if (!CalleeED.IsReachingAlignedBarrierOnly) {
32357fa27ce4SDimitry Andric Changed |=
32367fa27ce4SDimitry Andric setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
3237e3b55780SDimitry Andric SyncInstWorklist.push_back(&I);
32387fa27ce4SDimitry Andric }
3239e3b55780SDimitry Andric if (CalleeED.IsReachedFromAlignedBarrierOnly)
3240e3b55780SDimitry Andric mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED);
32417fa27ce4SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}];
32427fa27ce4SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED);
3243e3b55780SDimitry Andric continue;
3244e3b55780SDimitry Andric }
3245e3b55780SDimitry Andric }
32467fa27ce4SDimitry Andric if (!IsNoSync) {
32477fa27ce4SDimitry Andric ED.IsReachedFromAlignedBarrierOnly = false;
32487fa27ce4SDimitry Andric Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
32497fa27ce4SDimitry Andric SyncInstWorklist.push_back(&I);
32507fa27ce4SDimitry Andric }
3251e3b55780SDimitry Andric AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly;
3252e3b55780SDimitry Andric ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory();
32537fa27ce4SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}];
32547fa27ce4SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED);
3255e3b55780SDimitry Andric }
3256e3b55780SDimitry Andric
3257e3b55780SDimitry Andric if (!I.mayHaveSideEffects() && !I.mayReadFromMemory())
3258e3b55780SDimitry Andric continue;
3259e3b55780SDimitry Andric
3260e3b55780SDimitry Andric // If we have a callee we try to use fine-grained information to
3261e3b55780SDimitry Andric // determine local side-effects.
3262e3b55780SDimitry Andric if (CB) {
32637fa27ce4SDimitry Andric const auto *MemAA = A.getAAFor<AAMemoryLocation>(
3264e3b55780SDimitry Andric *this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
3265e3b55780SDimitry Andric
3266e3b55780SDimitry Andric auto AccessPred = [&](const Instruction *I, const Value *Ptr,
3267e3b55780SDimitry Andric AAMemoryLocation::AccessKind,
3268e3b55780SDimitry Andric AAMemoryLocation::MemoryLocationsKind) {
3269e3b55780SDimitry Andric return !AA::isPotentiallyAffectedByBarrier(A, {Ptr}, *this, I);
3270e3b55780SDimitry Andric };
32717fa27ce4SDimitry Andric if (MemAA && MemAA->getState().isValidState() &&
32727fa27ce4SDimitry Andric MemAA->checkForAllAccessesToMemoryKind(
3273e3b55780SDimitry Andric AccessPred, AAMemoryLocation::ALL_LOCATIONS))
3274e3b55780SDimitry Andric continue;
3275e3b55780SDimitry Andric }
3276e3b55780SDimitry Andric
32777fa27ce4SDimitry Andric auto &InfoCache = A.getInfoCache();
32787fa27ce4SDimitry Andric if (!I.mayHaveSideEffects() && InfoCache.isOnlyUsedByAssume(I))
3279e3b55780SDimitry Andric continue;
3280e3b55780SDimitry Andric
3281e3b55780SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I))
3282e3b55780SDimitry Andric if (LI->hasMetadata(LLVMContext::MD_invariant_load))
3283e3b55780SDimitry Andric continue;
3284e3b55780SDimitry Andric
3285e3b55780SDimitry Andric if (!ED.EncounteredNonLocalSideEffect &&
3286e3b55780SDimitry Andric AA::isPotentiallyAffectedByBarrier(A, I, *this))
3287e3b55780SDimitry Andric ED.EncounteredNonLocalSideEffect = true;
3288e3b55780SDimitry Andric }
3289e3b55780SDimitry Andric
32907fa27ce4SDimitry Andric bool IsEndAndNotReachingAlignedBarriersOnly = false;
3291e3b55780SDimitry Andric if (!isa<UnreachableInst>(BB.getTerminator()) &&
3292e3b55780SDimitry Andric !BB.getTerminator()->getNumSuccessors()) {
3293e3b55780SDimitry Andric
32947fa27ce4SDimitry Andric Changed |= mergeInPredecessor(A, InterProceduralED, ED);
3295e3b55780SDimitry Andric
32967fa27ce4SDimitry Andric auto &FnED = BEDMap[nullptr];
32977fa27ce4SDimitry Andric if (IsKernel && !IsExplicitlyAligned)
32987fa27ce4SDimitry Andric FnED.IsReachingAlignedBarrierOnly = false;
32997fa27ce4SDimitry Andric Changed |= mergeInPredecessor(A, FnED, ED);
33007fa27ce4SDimitry Andric
33017fa27ce4SDimitry Andric if (!FnED.IsReachingAlignedBarrierOnly) {
33027fa27ce4SDimitry Andric IsEndAndNotReachingAlignedBarriersOnly = true;
33037fa27ce4SDimitry Andric SyncInstWorklist.push_back(BB.getTerminator());
33047fa27ce4SDimitry Andric auto &BBED = BEDMap[&BB];
33057fa27ce4SDimitry Andric Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false);
33067fa27ce4SDimitry Andric }
3307e3b55780SDimitry Andric }
3308e3b55780SDimitry Andric
3309e3b55780SDimitry Andric ExecutionDomainTy &StoredED = BEDMap[&BB];
33107fa27ce4SDimitry Andric ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly &
33117fa27ce4SDimitry Andric !IsEndAndNotReachingAlignedBarriersOnly;
3312e3b55780SDimitry Andric
3313e3b55780SDimitry Andric // Check if we computed anything different as part of the forward
3314e3b55780SDimitry Andric // traversal. We do not take assumptions and aligned barriers into account
3315e3b55780SDimitry Andric // as they do not influence the state we iterate. Backward traversal values
3316e3b55780SDimitry Andric // are handled later on.
3317e3b55780SDimitry Andric if (ED.IsExecutedByInitialThreadOnly !=
3318e3b55780SDimitry Andric StoredED.IsExecutedByInitialThreadOnly ||
3319e3b55780SDimitry Andric ED.IsReachedFromAlignedBarrierOnly !=
3320e3b55780SDimitry Andric StoredED.IsReachedFromAlignedBarrierOnly ||
3321e3b55780SDimitry Andric ED.EncounteredNonLocalSideEffect !=
3322e3b55780SDimitry Andric StoredED.EncounteredNonLocalSideEffect)
3323e3b55780SDimitry Andric Changed = true;
3324e3b55780SDimitry Andric
3325e3b55780SDimitry Andric // Update the state with the new value.
3326e3b55780SDimitry Andric StoredED = std::move(ED);
3327e3b55780SDimitry Andric }
3328e3b55780SDimitry Andric
3329e3b55780SDimitry Andric // Propagate (non-aligned) sync instruction effects backwards until the
3330e3b55780SDimitry Andric // entry is hit or an aligned barrier.
3331e3b55780SDimitry Andric SmallSetVector<BasicBlock *, 16> Visited;
3332e3b55780SDimitry Andric while (!SyncInstWorklist.empty()) {
3333e3b55780SDimitry Andric Instruction *SyncInst = SyncInstWorklist.pop_back_val();
3334e3b55780SDimitry Andric Instruction *CurInst = SyncInst;
33357fa27ce4SDimitry Andric bool HitAlignedBarrierOrKnownEnd = false;
3336e3b55780SDimitry Andric while ((CurInst = CurInst->getPrevNode())) {
3337e3b55780SDimitry Andric auto *CB = dyn_cast<CallBase>(CurInst);
3338e3b55780SDimitry Andric if (!CB)
3339e3b55780SDimitry Andric continue;
33407fa27ce4SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}];
33417fa27ce4SDimitry Andric Changed |= setAndRecord(CallOutED.IsReachingAlignedBarrierOnly, false);
33427fa27ce4SDimitry Andric auto &CallInED = CEDMap[{CB, PRE}];
33437fa27ce4SDimitry Andric HitAlignedBarrierOrKnownEnd =
33447fa27ce4SDimitry Andric AlignedBarriers.count(CB) || !CallInED.IsReachingAlignedBarrierOnly;
33457fa27ce4SDimitry Andric if (HitAlignedBarrierOrKnownEnd)
3346e3b55780SDimitry Andric break;
33477fa27ce4SDimitry Andric Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
3348e3b55780SDimitry Andric }
33497fa27ce4SDimitry Andric if (HitAlignedBarrierOrKnownEnd)
3350e3b55780SDimitry Andric continue;
3351e3b55780SDimitry Andric BasicBlock *SyncBB = SyncInst->getParent();
3352e3b55780SDimitry Andric for (auto *PredBB : predecessors(SyncBB)) {
33537fa27ce4SDimitry Andric if (LivenessAA && LivenessAA->isEdgeDead(PredBB, SyncBB))
3354e3b55780SDimitry Andric continue;
3355e3b55780SDimitry Andric if (!Visited.insert(PredBB))
3356e3b55780SDimitry Andric continue;
3357e3b55780SDimitry Andric auto &PredED = BEDMap[PredBB];
33587fa27ce4SDimitry Andric if (setAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) {
3359e3b55780SDimitry Andric Changed = true;
33607fa27ce4SDimitry Andric SyncInstWorklist.push_back(PredBB->getTerminator());
33617fa27ce4SDimitry Andric }
3362e3b55780SDimitry Andric }
3363e3b55780SDimitry Andric if (SyncBB != &EntryBB)
3364e3b55780SDimitry Andric continue;
33657fa27ce4SDimitry Andric Changed |=
33667fa27ce4SDimitry Andric setAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false);
3367e3b55780SDimitry Andric }
3368e3b55780SDimitry Andric
3369e3b55780SDimitry Andric return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
3370344a3780SDimitry Andric }
3371344a3780SDimitry Andric
3372344a3780SDimitry Andric /// Try to replace memory allocation calls called by a single thread with a
3373344a3780SDimitry Andric /// static buffer of shared memory.
3374344a3780SDimitry Andric struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
3375344a3780SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAHeapToShared__anon7bbaa8dc0111::AAHeapToShared3376344a3780SDimitry Andric AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3377344a3780SDimitry Andric
3378344a3780SDimitry Andric /// Create an abstract attribute view for the position \p IRP.
3379344a3780SDimitry Andric static AAHeapToShared &createForPosition(const IRPosition &IRP,
3380344a3780SDimitry Andric Attributor &A);
3381344a3780SDimitry Andric
3382344a3780SDimitry Andric /// Returns true if HeapToShared conversion is assumed to be possible.
3383344a3780SDimitry Andric virtual bool isAssumedHeapToShared(CallBase &CB) const = 0;
3384344a3780SDimitry Andric
3385344a3780SDimitry Andric /// Returns true if HeapToShared conversion is assumed and the CB is a
3386344a3780SDimitry Andric /// callsite to a free operation to be removed.
3387344a3780SDimitry Andric virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0;
3388344a3780SDimitry Andric
3389344a3780SDimitry Andric /// See AbstractAttribute::getName().
getName__anon7bbaa8dc0111::AAHeapToShared3390344a3780SDimitry Andric const std::string getName() const override { return "AAHeapToShared"; }
3391344a3780SDimitry Andric
3392344a3780SDimitry Andric /// See AbstractAttribute::getIdAddr().
getIdAddr__anon7bbaa8dc0111::AAHeapToShared3393344a3780SDimitry Andric const char *getIdAddr() const override { return &ID; }
3394344a3780SDimitry Andric
3395344a3780SDimitry Andric /// This function should return true if the type of the \p AA is
3396344a3780SDimitry Andric /// AAHeapToShared.
classof__anon7bbaa8dc0111::AAHeapToShared3397344a3780SDimitry Andric static bool classof(const AbstractAttribute *AA) {
3398344a3780SDimitry Andric return (AA->getIdAddr() == &ID);
3399344a3780SDimitry Andric }
3400344a3780SDimitry Andric
3401344a3780SDimitry Andric /// Unique ID (due to the unique address)
3402344a3780SDimitry Andric static const char ID;
3403344a3780SDimitry Andric };
3404344a3780SDimitry Andric
3405344a3780SDimitry Andric struct AAHeapToSharedFunction : public AAHeapToShared {
AAHeapToSharedFunction__anon7bbaa8dc0111::AAHeapToSharedFunction3406344a3780SDimitry Andric AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
3407344a3780SDimitry Andric : AAHeapToShared(IRP, A) {}
3408344a3780SDimitry Andric
getAsStr__anon7bbaa8dc0111::AAHeapToSharedFunction34097fa27ce4SDimitry Andric const std::string getAsStr(Attributor *) const override {
3410344a3780SDimitry Andric return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
3411344a3780SDimitry Andric " malloc calls eligible.";
3412344a3780SDimitry Andric }
3413344a3780SDimitry Andric
3414344a3780SDimitry Andric /// See AbstractAttribute::trackStatistics().
trackStatistics__anon7bbaa8dc0111::AAHeapToSharedFunction3415344a3780SDimitry Andric void trackStatistics() const override {}
3416344a3780SDimitry Andric
3417344a3780SDimitry Andric /// This functions finds free calls that will be removed by the
3418344a3780SDimitry Andric /// HeapToShared transformation.
findPotentialRemovedFreeCalls__anon7bbaa8dc0111::AAHeapToSharedFunction3419344a3780SDimitry Andric void findPotentialRemovedFreeCalls(Attributor &A) {
3420344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3421344a3780SDimitry Andric auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
3422344a3780SDimitry Andric
3423344a3780SDimitry Andric PotentialRemovedFreeCalls.clear();
3424344a3780SDimitry Andric // Update free call users of found malloc calls.
3425344a3780SDimitry Andric for (CallBase *CB : MallocCalls) {
3426344a3780SDimitry Andric SmallVector<CallBase *, 4> FreeCalls;
3427344a3780SDimitry Andric for (auto *U : CB->users()) {
3428344a3780SDimitry Andric CallBase *C = dyn_cast<CallBase>(U);
3429344a3780SDimitry Andric if (C && C->getCalledFunction() == FreeRFI.Declaration)
3430344a3780SDimitry Andric FreeCalls.push_back(C);
3431344a3780SDimitry Andric }
3432344a3780SDimitry Andric
3433344a3780SDimitry Andric if (FreeCalls.size() != 1)
3434344a3780SDimitry Andric continue;
3435344a3780SDimitry Andric
3436344a3780SDimitry Andric PotentialRemovedFreeCalls.insert(FreeCalls.front());
3437344a3780SDimitry Andric }
3438344a3780SDimitry Andric }
3439344a3780SDimitry Andric
initialize__anon7bbaa8dc0111::AAHeapToSharedFunction3440344a3780SDimitry Andric void initialize(Attributor &A) override {
3441145449b1SDimitry Andric if (DisableOpenMPOptDeglobalization) {
3442145449b1SDimitry Andric indicatePessimisticFixpoint();
3443145449b1SDimitry Andric return;
3444145449b1SDimitry Andric }
3445145449b1SDimitry Andric
3446344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3447344a3780SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
3448e3b55780SDimitry Andric if (!RFI.Declaration)
3449e3b55780SDimitry Andric return;
3450344a3780SDimitry Andric
3451145449b1SDimitry Andric Attributor::SimplifictionCallbackTy SCB =
3452145449b1SDimitry Andric [](const IRPosition &, const AbstractAttribute *,
3453e3b55780SDimitry Andric bool &) -> std::optional<Value *> { return nullptr; };
3454e3b55780SDimitry Andric
3455e3b55780SDimitry Andric Function *F = getAnchorScope();
3456344a3780SDimitry Andric for (User *U : RFI.Declaration->users())
3457145449b1SDimitry Andric if (CallBase *CB = dyn_cast<CallBase>(U)) {
3458e3b55780SDimitry Andric if (CB->getFunction() != F)
3459e3b55780SDimitry Andric continue;
3460344a3780SDimitry Andric MallocCalls.insert(CB);
3461145449b1SDimitry Andric A.registerSimplificationCallback(IRPosition::callsite_returned(*CB),
3462145449b1SDimitry Andric SCB);
3463145449b1SDimitry Andric }
3464344a3780SDimitry Andric
3465344a3780SDimitry Andric findPotentialRemovedFreeCalls(A);
3466344a3780SDimitry Andric }
3467344a3780SDimitry Andric
isAssumedHeapToShared__anon7bbaa8dc0111::AAHeapToSharedFunction3468344a3780SDimitry Andric bool isAssumedHeapToShared(CallBase &CB) const override {
3469344a3780SDimitry Andric return isValidState() && MallocCalls.count(&CB);
3470344a3780SDimitry Andric }
3471344a3780SDimitry Andric
isAssumedHeapToSharedRemovedFree__anon7bbaa8dc0111::AAHeapToSharedFunction3472344a3780SDimitry Andric bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override {
3473344a3780SDimitry Andric return isValidState() && PotentialRemovedFreeCalls.count(&CB);
3474344a3780SDimitry Andric }
3475344a3780SDimitry Andric
manifest__anon7bbaa8dc0111::AAHeapToSharedFunction3476344a3780SDimitry Andric ChangeStatus manifest(Attributor &A) override {
3477344a3780SDimitry Andric if (MallocCalls.empty())
3478344a3780SDimitry Andric return ChangeStatus::UNCHANGED;
3479344a3780SDimitry Andric
3480344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3481344a3780SDimitry Andric auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
3482344a3780SDimitry Andric
3483344a3780SDimitry Andric Function *F = getAnchorScope();
3484344a3780SDimitry Andric auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
3485344a3780SDimitry Andric DepClassTy::OPTIONAL);
3486344a3780SDimitry Andric
3487344a3780SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED;
3488344a3780SDimitry Andric for (CallBase *CB : MallocCalls) {
3489344a3780SDimitry Andric // Skip replacing this if HeapToStack has already claimed it.
3490344a3780SDimitry Andric if (HS && HS->isAssumedHeapToStack(*CB))
3491344a3780SDimitry Andric continue;
3492344a3780SDimitry Andric
3493344a3780SDimitry Andric // Find the unique free call to remove it.
3494344a3780SDimitry Andric SmallVector<CallBase *, 4> FreeCalls;
3495344a3780SDimitry Andric for (auto *U : CB->users()) {
3496344a3780SDimitry Andric CallBase *C = dyn_cast<CallBase>(U);
3497344a3780SDimitry Andric if (C && C->getCalledFunction() == FreeCall.Declaration)
3498344a3780SDimitry Andric FreeCalls.push_back(C);
3499344a3780SDimitry Andric }
3500344a3780SDimitry Andric if (FreeCalls.size() != 1)
3501344a3780SDimitry Andric continue;
3502344a3780SDimitry Andric
35036f8fc217SDimitry Andric auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0));
3504344a3780SDimitry Andric
3505145449b1SDimitry Andric if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) {
3506145449b1SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB
3507145449b1SDimitry Andric << " with shared memory."
3508145449b1SDimitry Andric << " Shared memory usage is limited to "
3509145449b1SDimitry Andric << SharedMemoryLimit << " bytes\n");
3510145449b1SDimitry Andric continue;
3511145449b1SDimitry Andric }
3512145449b1SDimitry Andric
3513c0981da4SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
3514c0981da4SDimitry Andric << " with " << AllocSize->getZExtValue()
3515344a3780SDimitry Andric << " bytes of shared memory\n");
3516344a3780SDimitry Andric
3517344a3780SDimitry Andric // Create a new shared memory buffer of the same size as the allocation
3518344a3780SDimitry Andric // and replace all the uses of the original allocation with it.
3519344a3780SDimitry Andric Module *M = CB->getModule();
3520344a3780SDimitry Andric Type *Int8Ty = Type::getInt8Ty(M->getContext());
3521344a3780SDimitry Andric Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
3522344a3780SDimitry Andric auto *SharedMem = new GlobalVariable(
3523344a3780SDimitry Andric *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
35247fa27ce4SDimitry Andric PoisonValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
3525344a3780SDimitry Andric GlobalValue::NotThreadLocal,
3526344a3780SDimitry Andric static_cast<unsigned>(AddressSpace::Shared));
3527344a3780SDimitry Andric auto *NewBuffer =
3528344a3780SDimitry Andric ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
3529344a3780SDimitry Andric
3530344a3780SDimitry Andric auto Remark = [&](OptimizationRemark OR) {
3531344a3780SDimitry Andric return OR << "Replaced globalized variable with "
3532344a3780SDimitry Andric << ore::NV("SharedMemory", AllocSize->getZExtValue())
35337fa27ce4SDimitry Andric << (AllocSize->isOne() ? " byte " : " bytes ")
3534344a3780SDimitry Andric << "of shared memory.";
3535344a3780SDimitry Andric };
3536344a3780SDimitry Andric A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);
3537344a3780SDimitry Andric
35386f8fc217SDimitry Andric MaybeAlign Alignment = CB->getRetAlign();
35396f8fc217SDimitry Andric assert(Alignment &&
35406f8fc217SDimitry Andric "HeapToShared on allocation without alignment attribute");
35417fa27ce4SDimitry Andric SharedMem->setAlignment(*Alignment);
3542344a3780SDimitry Andric
3543145449b1SDimitry Andric A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer);
3544344a3780SDimitry Andric A.deleteAfterManifest(*CB);
3545344a3780SDimitry Andric A.deleteAfterManifest(*FreeCalls.front());
3546344a3780SDimitry Andric
3547145449b1SDimitry Andric SharedMemoryUsed += AllocSize->getZExtValue();
3548145449b1SDimitry Andric NumBytesMovedToSharedMemory = SharedMemoryUsed;
3549344a3780SDimitry Andric Changed = ChangeStatus::CHANGED;
3550344a3780SDimitry Andric }
3551344a3780SDimitry Andric
3552344a3780SDimitry Andric return Changed;
3553344a3780SDimitry Andric }
3554344a3780SDimitry Andric
updateImpl__anon7bbaa8dc0111::AAHeapToSharedFunction3555344a3780SDimitry Andric ChangeStatus updateImpl(Attributor &A) override {
3556e3b55780SDimitry Andric if (MallocCalls.empty())
3557e3b55780SDimitry Andric return indicatePessimisticFixpoint();
3558344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3559344a3780SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
3560e3b55780SDimitry Andric if (!RFI.Declaration)
3561e3b55780SDimitry Andric return ChangeStatus::UNCHANGED;
3562e3b55780SDimitry Andric
3563344a3780SDimitry Andric Function *F = getAnchorScope();
3564344a3780SDimitry Andric
3565344a3780SDimitry Andric auto NumMallocCalls = MallocCalls.size();
3566344a3780SDimitry Andric
3567344a3780SDimitry Andric // Only consider malloc calls executed by a single thread with a constant.
3568344a3780SDimitry Andric for (User *U : RFI.Declaration->users()) {
3569e3b55780SDimitry Andric if (CallBase *CB = dyn_cast<CallBase>(U)) {
3570e3b55780SDimitry Andric if (CB->getCaller() != F)
3571e3b55780SDimitry Andric continue;
3572e3b55780SDimitry Andric if (!MallocCalls.count(CB))
3573e3b55780SDimitry Andric continue;
3574e3b55780SDimitry Andric if (!isa<ConstantInt>(CB->getArgOperand(0))) {
3575e3b55780SDimitry Andric MallocCalls.remove(CB);
3576e3b55780SDimitry Andric continue;
3577e3b55780SDimitry Andric }
35787fa27ce4SDimitry Andric const auto *ED = A.getAAFor<AAExecutionDomain>(
3579344a3780SDimitry Andric *this, IRPosition::function(*F), DepClassTy::REQUIRED);
35807fa27ce4SDimitry Andric if (!ED || !ED->isExecutedByInitialThreadOnly(*CB))
35816f8fc217SDimitry Andric MallocCalls.remove(CB);
3582344a3780SDimitry Andric }
3583e3b55780SDimitry Andric }
3584344a3780SDimitry Andric
3585344a3780SDimitry Andric findPotentialRemovedFreeCalls(A);
3586344a3780SDimitry Andric
3587344a3780SDimitry Andric if (NumMallocCalls != MallocCalls.size())
3588344a3780SDimitry Andric return ChangeStatus::CHANGED;
3589344a3780SDimitry Andric
3590344a3780SDimitry Andric return ChangeStatus::UNCHANGED;
3591344a3780SDimitry Andric }
3592344a3780SDimitry Andric
3593344a3780SDimitry Andric /// Collection of all malloc calls in a function.
35946f8fc217SDimitry Andric SmallSetVector<CallBase *, 4> MallocCalls;
3595344a3780SDimitry Andric /// Collection of potentially removed free calls in a function.
3596344a3780SDimitry Andric SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
3597145449b1SDimitry Andric /// The total amount of shared memory that has been used for HeapToShared.
3598145449b1SDimitry Andric unsigned SharedMemoryUsed = 0;
3599344a3780SDimitry Andric };
3600344a3780SDimitry Andric
3601344a3780SDimitry Andric struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
3602344a3780SDimitry Andric using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
AAKernelInfo__anon7bbaa8dc0111::AAKernelInfo3603344a3780SDimitry Andric AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3604344a3780SDimitry Andric
3605b1c73532SDimitry Andric /// The callee value is tracked beyond a simple stripPointerCasts, so we allow
3606b1c73532SDimitry Andric /// unknown callees.
requiresCalleeForCallBase__anon7bbaa8dc0111::AAKernelInfo3607b1c73532SDimitry Andric static bool requiresCalleeForCallBase() { return false; }
3608b1c73532SDimitry Andric
3609344a3780SDimitry Andric /// Statistics are tracked as part of manifest for now.
trackStatistics__anon7bbaa8dc0111::AAKernelInfo3610344a3780SDimitry Andric void trackStatistics() const override {}
3611344a3780SDimitry Andric
3612344a3780SDimitry Andric /// See AbstractAttribute::getAsStr()
getAsStr__anon7bbaa8dc0111::AAKernelInfo36137fa27ce4SDimitry Andric const std::string getAsStr(Attributor *) const override {
3614344a3780SDimitry Andric if (!isValidState())
3615344a3780SDimitry Andric return "<invalid>";
3616344a3780SDimitry Andric return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
3617344a3780SDimitry Andric : "generic") +
3618344a3780SDimitry Andric std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
3619344a3780SDimitry Andric : "") +
3620344a3780SDimitry Andric std::string(" #PRs: ") +
3621c0981da4SDimitry Andric (ReachedKnownParallelRegions.isValidState()
3622c0981da4SDimitry Andric ? std::to_string(ReachedKnownParallelRegions.size())
3623c0981da4SDimitry Andric : "<invalid>") +
3624344a3780SDimitry Andric ", #Unknown PRs: " +
3625c0981da4SDimitry Andric (ReachedUnknownParallelRegions.isValidState()
3626c0981da4SDimitry Andric ? std::to_string(ReachedUnknownParallelRegions.size())
3627c0981da4SDimitry Andric : "<invalid>") +
3628c0981da4SDimitry Andric ", #Reaching Kernels: " +
3629c0981da4SDimitry Andric (ReachingKernelEntries.isValidState()
3630c0981da4SDimitry Andric ? std::to_string(ReachingKernelEntries.size())
3631e3b55780SDimitry Andric : "<invalid>") +
3632e3b55780SDimitry Andric ", #ParLevels: " +
3633e3b55780SDimitry Andric (ParallelLevels.isValidState()
3634e3b55780SDimitry Andric ? std::to_string(ParallelLevels.size())
3635b1c73532SDimitry Andric : "<invalid>") +
3636b1c73532SDimitry Andric ", NestedPar: " + (NestedParallelism ? "yes" : "no");
3637344a3780SDimitry Andric }
3638344a3780SDimitry Andric
3639344a3780SDimitry Andric /// Create an abstract attribute biew for the position \p IRP.
3640344a3780SDimitry Andric static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);
3641344a3780SDimitry Andric
3642344a3780SDimitry Andric /// See AbstractAttribute::getName()
getName__anon7bbaa8dc0111::AAKernelInfo3643344a3780SDimitry Andric const std::string getName() const override { return "AAKernelInfo"; }
3644344a3780SDimitry Andric
3645344a3780SDimitry Andric /// See AbstractAttribute::getIdAddr()
getIdAddr__anon7bbaa8dc0111::AAKernelInfo3646344a3780SDimitry Andric const char *getIdAddr() const override { return &ID; }
3647344a3780SDimitry Andric
3648344a3780SDimitry Andric /// This function should return true if the type of the \p AA is AAKernelInfo
classof__anon7bbaa8dc0111::AAKernelInfo3649344a3780SDimitry Andric static bool classof(const AbstractAttribute *AA) {
3650344a3780SDimitry Andric return (AA->getIdAddr() == &ID);
3651344a3780SDimitry Andric }
3652344a3780SDimitry Andric
3653344a3780SDimitry Andric static const char ID;
3654344a3780SDimitry Andric };
3655344a3780SDimitry Andric
3656344a3780SDimitry Andric /// The function kernel info abstract attribute, basically, what can we say
3657344a3780SDimitry Andric /// about a function with regards to the KernelInfoState.
3658344a3780SDimitry Andric struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction__anon7bbaa8dc0111::AAKernelInfoFunction3659344a3780SDimitry Andric AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
3660344a3780SDimitry Andric : AAKernelInfo(IRP, A) {}
3661344a3780SDimitry Andric
3662c0981da4SDimitry Andric SmallPtrSet<Instruction *, 4> GuardedInstructions;
3663c0981da4SDimitry Andric
getGuardedInstructions__anon7bbaa8dc0111::AAKernelInfoFunction3664c0981da4SDimitry Andric SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
3665c0981da4SDimitry Andric return GuardedInstructions;
3666c0981da4SDimitry Andric }
3667c0981da4SDimitry Andric
setConfigurationOfKernelEnvironment__anon7bbaa8dc0111::AAKernelInfoFunction3668b1c73532SDimitry Andric void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) {
3669b1c73532SDimitry Andric Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction(
3670b1c73532SDimitry Andric KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx});
3671b1c73532SDimitry Andric assert(NewKernelEnvC && "Failed to create new kernel environment");
3672b1c73532SDimitry Andric KernelEnvC = cast<ConstantStruct>(NewKernelEnvC);
3673b1c73532SDimitry Andric }
3674b1c73532SDimitry Andric
3675b1c73532SDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER) \
3676b1c73532SDimitry Andric void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \
3677b1c73532SDimitry Andric ConstantStruct *ConfigC = \
3678b1c73532SDimitry Andric KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \
3679b1c73532SDimitry Andric Constant *NewConfigC = ConstantFoldInsertValueInstruction( \
3680b1c73532SDimitry Andric ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \
3681b1c73532SDimitry Andric assert(NewConfigC && "Failed to create new configuration environment"); \
3682b1c73532SDimitry Andric setConfigurationOfKernelEnvironment(cast<ConstantStruct>(NewConfigC)); \
3683b1c73532SDimitry Andric }
3684b1c73532SDimitry Andric
3685b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER__anon7bbaa8dc0111::AAKernelInfoFunction3686b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism)
3687b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode)
3688b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinThreads)
3689b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxThreads)
3690b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinTeams)
3691b1c73532SDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxTeams)
3692b1c73532SDimitry Andric
3693b1c73532SDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER
3694b1c73532SDimitry Andric
3695344a3780SDimitry Andric /// See AbstractAttribute::initialize(...).
3696344a3780SDimitry Andric void initialize(Attributor &A) override {
3697344a3780SDimitry Andric // This is a high-level transform that might change the constant arguments
3698344a3780SDimitry Andric // of the init and dinit calls. We need to tell the Attributor about this
3699344a3780SDimitry Andric // to avoid other parts using the current constant value for simpliication.
3700344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3701344a3780SDimitry Andric
3702344a3780SDimitry Andric Function *Fn = getAnchorScope();
3703344a3780SDimitry Andric
3704344a3780SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &InitRFI =
3705344a3780SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
3706344a3780SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
3707344a3780SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];
3708344a3780SDimitry Andric
3709344a3780SDimitry Andric // For kernels we perform more initialization work, first we find the init
3710344a3780SDimitry Andric // and deinit calls.
3711344a3780SDimitry Andric auto StoreCallBase = [](Use &U,
3712344a3780SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI,
3713344a3780SDimitry Andric CallBase *&Storage) {
3714344a3780SDimitry Andric CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
3715344a3780SDimitry Andric assert(CB &&
3716344a3780SDimitry Andric "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!");
3717344a3780SDimitry Andric assert(!Storage &&
3718344a3780SDimitry Andric "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!");
3719344a3780SDimitry Andric Storage = CB;
3720344a3780SDimitry Andric return false;
3721344a3780SDimitry Andric };
3722344a3780SDimitry Andric InitRFI.foreachUse(
3723344a3780SDimitry Andric [&](Use &U, Function &) {
3724344a3780SDimitry Andric StoreCallBase(U, InitRFI, KernelInitCB);
3725344a3780SDimitry Andric return false;
3726344a3780SDimitry Andric },
3727344a3780SDimitry Andric Fn);
3728344a3780SDimitry Andric DeinitRFI.foreachUse(
3729344a3780SDimitry Andric [&](Use &U, Function &) {
3730344a3780SDimitry Andric StoreCallBase(U, DeinitRFI, KernelDeinitCB);
3731344a3780SDimitry Andric return false;
3732344a3780SDimitry Andric },
3733344a3780SDimitry Andric Fn);
3734344a3780SDimitry Andric
3735c0981da4SDimitry Andric // Ignore kernels without initializers such as global constructors.
3736145449b1SDimitry Andric if (!KernelInitCB || !KernelDeinitCB)
3737c0981da4SDimitry Andric return;
3738145449b1SDimitry Andric
3739145449b1SDimitry Andric // Add itself to the reaching kernel and set IsKernelEntry.
3740145449b1SDimitry Andric ReachingKernelEntries.insert(Fn);
3741145449b1SDimitry Andric IsKernelEntry = true;
3742344a3780SDimitry Andric
3743b1c73532SDimitry Andric KernelEnvC =
3744b1c73532SDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
3745b1c73532SDimitry Andric GlobalVariable *KernelEnvGV =
3746b1c73532SDimitry Andric KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
3747344a3780SDimitry Andric
3748b1c73532SDimitry Andric Attributor::GlobalVariableSimplifictionCallbackTy
3749b1c73532SDimitry Andric KernelConfigurationSimplifyCB =
3750b1c73532SDimitry Andric [&](const GlobalVariable &GV, const AbstractAttribute *AA,
3751b1c73532SDimitry Andric bool &UsedAssumedInformation) -> std::optional<Constant *> {
3752b1c73532SDimitry Andric if (!isAtFixpoint()) {
3753b1c73532SDimitry Andric if (!AA)
3754c0981da4SDimitry Andric return nullptr;
3755344a3780SDimitry Andric UsedAssumedInformation = true;
3756b1c73532SDimitry Andric A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3757344a3780SDimitry Andric }
3758b1c73532SDimitry Andric return KernelEnvC;
3759344a3780SDimitry Andric };
3760344a3780SDimitry Andric
3761b1c73532SDimitry Andric A.registerGlobalVariableSimplificationCallback(
3762b1c73532SDimitry Andric *KernelEnvGV, KernelConfigurationSimplifyCB);
3763344a3780SDimitry Andric
3764344a3780SDimitry Andric // Check if we know we are in SPMD-mode already.
3765b1c73532SDimitry Andric ConstantInt *ExecModeC =
3766b1c73532SDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
3767b1c73532SDimitry Andric ConstantInt *AssumedExecModeC = ConstantInt::get(
376899aabd70SDimitry Andric ExecModeC->getIntegerType(),
3769b1c73532SDimitry Andric ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD);
3770b1c73532SDimitry Andric if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)
3771344a3780SDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint();
3772c0981da4SDimitry Andric else if (DisableOpenMPOptSPMDization)
3773b1c73532SDimitry Andric // This is a generic region but SPMDization is disabled so stop
3774b1c73532SDimitry Andric // tracking.
3775c0981da4SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3776b1c73532SDimitry Andric else
3777b1c73532SDimitry Andric setExecModeOfKernelEnvironment(AssumedExecModeC);
3778b1c73532SDimitry Andric
3779b1c73532SDimitry Andric const Triple T(Fn->getParent()->getTargetTriple());
3780b1c73532SDimitry Andric auto *Int32Ty = Type::getInt32Ty(Fn->getContext());
3781b1c73532SDimitry Andric auto [MinThreads, MaxThreads] =
3782b1c73532SDimitry Andric OpenMPIRBuilder::readThreadBoundsForKernel(T, *Fn);
3783b1c73532SDimitry Andric if (MinThreads)
3784b1c73532SDimitry Andric setMinThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinThreads));
3785b1c73532SDimitry Andric if (MaxThreads)
3786b1c73532SDimitry Andric setMaxThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxThreads));
3787b1c73532SDimitry Andric auto [MinTeams, MaxTeams] =
3788b1c73532SDimitry Andric OpenMPIRBuilder::readTeamBoundsForKernel(T, *Fn);
3789b1c73532SDimitry Andric if (MinTeams)
3790b1c73532SDimitry Andric setMinTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinTeams));
3791b1c73532SDimitry Andric if (MaxTeams)
3792b1c73532SDimitry Andric setMaxTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxTeams));
3793b1c73532SDimitry Andric
3794b1c73532SDimitry Andric ConstantInt *MayUseNestedParallelismC =
3795b1c73532SDimitry Andric KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
3796b1c73532SDimitry Andric ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
379799aabd70SDimitry Andric MayUseNestedParallelismC->getIntegerType(), NestedParallelism);
3798b1c73532SDimitry Andric setMayUseNestedParallelismOfKernelEnvironment(
3799b1c73532SDimitry Andric AssumedMayUseNestedParallelismC);
3800b1c73532SDimitry Andric
3801b1c73532SDimitry Andric if (!DisableOpenMPOptStateMachineRewrite) {
3802b1c73532SDimitry Andric ConstantInt *UseGenericStateMachineC =
3803b1c73532SDimitry Andric KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
3804b1c73532SDimitry Andric KernelEnvC);
3805b1c73532SDimitry Andric ConstantInt *AssumedUseGenericStateMachineC =
380699aabd70SDimitry Andric ConstantInt::get(UseGenericStateMachineC->getIntegerType(), false);
3807b1c73532SDimitry Andric setUseGenericStateMachineOfKernelEnvironment(
3808b1c73532SDimitry Andric AssumedUseGenericStateMachineC);
3809b1c73532SDimitry Andric }
3810e3b55780SDimitry Andric
3811e3b55780SDimitry Andric // Register virtual uses of functions we might need to preserve.
3812e3b55780SDimitry Andric auto RegisterVirtualUse = [&](RuntimeFunction RFKind,
3813e3b55780SDimitry Andric Attributor::VirtualUseCallbackTy &CB) {
3814e3b55780SDimitry Andric if (!OMPInfoCache.RFIs[RFKind].Declaration)
3815e3b55780SDimitry Andric return;
3816e3b55780SDimitry Andric A.registerVirtualUseCallback(*OMPInfoCache.RFIs[RFKind].Declaration, CB);
3817e3b55780SDimitry Andric };
3818e3b55780SDimitry Andric
3819e3b55780SDimitry Andric // Add a dependence to ensure updates if the state changes.
3820e3b55780SDimitry Andric auto AddDependence = [](Attributor &A, const AAKernelInfo *KI,
3821e3b55780SDimitry Andric const AbstractAttribute *QueryingAA) {
3822e3b55780SDimitry Andric if (QueryingAA) {
3823e3b55780SDimitry Andric A.recordDependence(*KI, *QueryingAA, DepClassTy::OPTIONAL);
3824e3b55780SDimitry Andric }
3825e3b55780SDimitry Andric return true;
3826e3b55780SDimitry Andric };
3827e3b55780SDimitry Andric
3828e3b55780SDimitry Andric Attributor::VirtualUseCallbackTy CustomStateMachineUseCB =
3829e3b55780SDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) {
3830e3b55780SDimitry Andric // Whenever we create a custom state machine we will insert calls to
3831e3b55780SDimitry Andric // __kmpc_get_hardware_num_threads_in_block,
3832e3b55780SDimitry Andric // __kmpc_get_warp_size,
3833e3b55780SDimitry Andric // __kmpc_barrier_simple_generic,
3834e3b55780SDimitry Andric // __kmpc_kernel_parallel, and
3835e3b55780SDimitry Andric // __kmpc_kernel_end_parallel.
3836e3b55780SDimitry Andric // Not needed if we are on track for SPMDzation.
3837e3b55780SDimitry Andric if (SPMDCompatibilityTracker.isValidState())
3838e3b55780SDimitry Andric return AddDependence(A, this, QueryingAA);
3839e3b55780SDimitry Andric // Not needed if we can't rewrite due to an invalid state.
3840e3b55780SDimitry Andric if (!ReachedKnownParallelRegions.isValidState())
3841e3b55780SDimitry Andric return AddDependence(A, this, QueryingAA);
3842e3b55780SDimitry Andric return false;
3843e3b55780SDimitry Andric };
3844e3b55780SDimitry Andric
3845e3b55780SDimitry Andric // Not needed if we are pre-runtime merge.
3846e3b55780SDimitry Andric if (!KernelInitCB->getCalledFunction()->isDeclaration()) {
3847e3b55780SDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_hardware_num_threads_in_block,
3848e3b55780SDimitry Andric CustomStateMachineUseCB);
3849e3b55780SDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_warp_size, CustomStateMachineUseCB);
3850e3b55780SDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_barrier_simple_generic,
3851e3b55780SDimitry Andric CustomStateMachineUseCB);
3852e3b55780SDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_kernel_parallel,
3853e3b55780SDimitry Andric CustomStateMachineUseCB);
3854e3b55780SDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_kernel_end_parallel,
3855e3b55780SDimitry Andric CustomStateMachineUseCB);
3856e3b55780SDimitry Andric }
3857e3b55780SDimitry Andric
3858e3b55780SDimitry Andric // If we do not perform SPMDzation we do not need the virtual uses below.
3859e3b55780SDimitry Andric if (SPMDCompatibilityTracker.isAtFixpoint())
3860e3b55780SDimitry Andric return;
3861e3b55780SDimitry Andric
3862e3b55780SDimitry Andric Attributor::VirtualUseCallbackTy HWThreadIdUseCB =
3863e3b55780SDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) {
3864e3b55780SDimitry Andric // Whenever we perform SPMDzation we will insert
3865e3b55780SDimitry Andric // __kmpc_get_hardware_thread_id_in_block calls.
3866e3b55780SDimitry Andric if (!SPMDCompatibilityTracker.isValidState())
3867e3b55780SDimitry Andric return AddDependence(A, this, QueryingAA);
3868e3b55780SDimitry Andric return false;
3869e3b55780SDimitry Andric };
3870e3b55780SDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_hardware_thread_id_in_block,
3871e3b55780SDimitry Andric HWThreadIdUseCB);
3872e3b55780SDimitry Andric
3873e3b55780SDimitry Andric Attributor::VirtualUseCallbackTy SPMDBarrierUseCB =
3874e3b55780SDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) {
3875e3b55780SDimitry Andric // Whenever we perform SPMDzation with guarding we will insert
3876e3b55780SDimitry Andric // __kmpc_simple_barrier_spmd calls. If SPMDzation failed, there is
3877e3b55780SDimitry Andric // nothing to guard, or there are no parallel regions, we don't need
3878e3b55780SDimitry Andric // the calls.
3879e3b55780SDimitry Andric if (!SPMDCompatibilityTracker.isValidState())
3880e3b55780SDimitry Andric return AddDependence(A, this, QueryingAA);
3881e3b55780SDimitry Andric if (SPMDCompatibilityTracker.empty())
3882e3b55780SDimitry Andric return AddDependence(A, this, QueryingAA);
3883e3b55780SDimitry Andric if (!mayContainParallelRegion())
3884e3b55780SDimitry Andric return AddDependence(A, this, QueryingAA);
3885e3b55780SDimitry Andric return false;
3886e3b55780SDimitry Andric };
3887e3b55780SDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_barrier_simple_spmd, SPMDBarrierUseCB);
3888c0981da4SDimitry Andric }
3889c0981da4SDimitry Andric
3890c0981da4SDimitry Andric /// Sanitize the string \p S such that it is a suitable global symbol name.
sanitizeForGlobalName__anon7bbaa8dc0111::AAKernelInfoFunction3891c0981da4SDimitry Andric static std::string sanitizeForGlobalName(std::string S) {
3892c0981da4SDimitry Andric std::replace_if(
3893c0981da4SDimitry Andric S.begin(), S.end(),
3894c0981da4SDimitry Andric [](const char C) {
3895c0981da4SDimitry Andric return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
3896c0981da4SDimitry Andric (C >= '0' && C <= '9') || C == '_');
3897c0981da4SDimitry Andric },
3898c0981da4SDimitry Andric '.');
3899c0981da4SDimitry Andric return S;
3900344a3780SDimitry Andric }
3901344a3780SDimitry Andric
3902344a3780SDimitry Andric /// Modify the IR based on the KernelInfoState as the fixpoint iteration is
3903344a3780SDimitry Andric /// finished now.
manifest__anon7bbaa8dc0111::AAKernelInfoFunction3904344a3780SDimitry Andric ChangeStatus manifest(Attributor &A) override {
3905344a3780SDimitry Andric // If we are not looking at a kernel with __kmpc_target_init and
3906344a3780SDimitry Andric // __kmpc_target_deinit call we cannot actually manifest the information.
3907344a3780SDimitry Andric if (!KernelInitCB || !KernelDeinitCB)
3908344a3780SDimitry Andric return ChangeStatus::UNCHANGED;
3909344a3780SDimitry Andric
3910c0981da4SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED;
3911b1c73532SDimitry Andric
3912b1c73532SDimitry Andric bool HasBuiltStateMachine = true;
3913e3b55780SDimitry Andric if (!changeToSPMDMode(A, Changed)) {
3914e3b55780SDimitry Andric if (!KernelInitCB->getCalledFunction()->isDeclaration())
3915b1c73532SDimitry Andric HasBuiltStateMachine = buildCustomStateMachine(A, Changed);
3916b1c73532SDimitry Andric else
3917b1c73532SDimitry Andric HasBuiltStateMachine = false;
3918b1c73532SDimitry Andric }
3919b1c73532SDimitry Andric
3920b1c73532SDimitry Andric // We need to reset KernelEnvC if specific rewriting is not done.
3921b1c73532SDimitry Andric ConstantStruct *ExistingKernelEnvC =
3922b1c73532SDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
3923b1c73532SDimitry Andric ConstantInt *OldUseGenericStateMachineVal =
3924b1c73532SDimitry Andric KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
3925b1c73532SDimitry Andric ExistingKernelEnvC);
3926b1c73532SDimitry Andric if (!HasBuiltStateMachine)
3927b1c73532SDimitry Andric setUseGenericStateMachineOfKernelEnvironment(
3928b1c73532SDimitry Andric OldUseGenericStateMachineVal);
3929b1c73532SDimitry Andric
3930b1c73532SDimitry Andric // At last, update the KernelEnvc
3931b1c73532SDimitry Andric GlobalVariable *KernelEnvGV =
3932b1c73532SDimitry Andric KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
3933b1c73532SDimitry Andric if (KernelEnvGV->getInitializer() != KernelEnvC) {
3934b1c73532SDimitry Andric KernelEnvGV->setInitializer(KernelEnvC);
3935b1c73532SDimitry Andric Changed = ChangeStatus::CHANGED;
3936e3b55780SDimitry Andric }
3937344a3780SDimitry Andric
3938c0981da4SDimitry Andric return Changed;
3939344a3780SDimitry Andric }
3940344a3780SDimitry Andric
insertInstructionGuardsHelper__anon7bbaa8dc0111::AAKernelInfoFunction3941e3b55780SDimitry Andric void insertInstructionGuardsHelper(Attributor &A) {
3942344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3943344a3780SDimitry Andric
3944c0981da4SDimitry Andric auto CreateGuardedRegion = [&](Instruction *RegionStartI,
3945c0981da4SDimitry Andric Instruction *RegionEndI) {
3946c0981da4SDimitry Andric LoopInfo *LI = nullptr;
3947c0981da4SDimitry Andric DominatorTree *DT = nullptr;
3948c0981da4SDimitry Andric MemorySSAUpdater *MSU = nullptr;
3949c0981da4SDimitry Andric using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3950c0981da4SDimitry Andric
3951c0981da4SDimitry Andric BasicBlock *ParentBB = RegionStartI->getParent();
3952c0981da4SDimitry Andric Function *Fn = ParentBB->getParent();
3953c0981da4SDimitry Andric Module &M = *Fn->getParent();
3954c0981da4SDimitry Andric
3955c0981da4SDimitry Andric // Create all the blocks and logic.
3956c0981da4SDimitry Andric // ParentBB:
3957c0981da4SDimitry Andric // goto RegionCheckTidBB
3958c0981da4SDimitry Andric // RegionCheckTidBB:
3959c0981da4SDimitry Andric // Tid = __kmpc_hardware_thread_id()
3960c0981da4SDimitry Andric // if (Tid != 0)
3961c0981da4SDimitry Andric // goto RegionBarrierBB
3962c0981da4SDimitry Andric // RegionStartBB:
3963c0981da4SDimitry Andric // <execute instructions guarded>
3964c0981da4SDimitry Andric // goto RegionEndBB
3965c0981da4SDimitry Andric // RegionEndBB:
3966c0981da4SDimitry Andric // <store escaping values to shared mem>
3967c0981da4SDimitry Andric // goto RegionBarrierBB
3968c0981da4SDimitry Andric // RegionBarrierBB:
3969c0981da4SDimitry Andric // __kmpc_simple_barrier_spmd()
3970c0981da4SDimitry Andric // // second barrier is omitted if lacking escaping values.
3971c0981da4SDimitry Andric // <load escaping values from shared mem>
3972c0981da4SDimitry Andric // __kmpc_simple_barrier_spmd()
3973c0981da4SDimitry Andric // goto RegionExitBB
3974c0981da4SDimitry Andric // RegionExitBB:
3975c0981da4SDimitry Andric // <execute rest of instructions>
3976c0981da4SDimitry Andric
3977c0981da4SDimitry Andric BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
3978c0981da4SDimitry Andric DT, LI, MSU, "region.guarded.end");
3979c0981da4SDimitry Andric BasicBlock *RegionBarrierBB =
3980c0981da4SDimitry Andric SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
3981c0981da4SDimitry Andric MSU, "region.barrier");
3982c0981da4SDimitry Andric BasicBlock *RegionExitBB =
3983c0981da4SDimitry Andric SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
3984c0981da4SDimitry Andric DT, LI, MSU, "region.exit");
3985c0981da4SDimitry Andric BasicBlock *RegionStartBB =
3986c0981da4SDimitry Andric SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
3987c0981da4SDimitry Andric
3988c0981da4SDimitry Andric assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&
3989c0981da4SDimitry Andric "Expected a different CFG");
3990c0981da4SDimitry Andric
3991c0981da4SDimitry Andric BasicBlock *RegionCheckTidBB = SplitBlock(
3992c0981da4SDimitry Andric ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
3993c0981da4SDimitry Andric
3994c0981da4SDimitry Andric // Register basic blocks with the Attributor.
3995c0981da4SDimitry Andric A.registerManifestAddedBasicBlock(*RegionEndBB);
3996c0981da4SDimitry Andric A.registerManifestAddedBasicBlock(*RegionBarrierBB);
3997c0981da4SDimitry Andric A.registerManifestAddedBasicBlock(*RegionExitBB);
3998c0981da4SDimitry Andric A.registerManifestAddedBasicBlock(*RegionStartBB);
3999c0981da4SDimitry Andric A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
4000c0981da4SDimitry Andric
4001c0981da4SDimitry Andric bool HasBroadcastValues = false;
4002c0981da4SDimitry Andric // Find escaping outputs from the guarded region to outside users and
4003c0981da4SDimitry Andric // broadcast their values to them.
4004c0981da4SDimitry Andric for (Instruction &I : *RegionStartBB) {
4005b1c73532SDimitry Andric SmallVector<Use *, 4> OutsideUses;
4006b1c73532SDimitry Andric for (Use &U : I.uses()) {
4007b1c73532SDimitry Andric Instruction &UsrI = *cast<Instruction>(U.getUser());
4008c0981da4SDimitry Andric if (UsrI.getParent() != RegionStartBB)
4009b1c73532SDimitry Andric OutsideUses.push_back(&U);
4010c0981da4SDimitry Andric }
4011c0981da4SDimitry Andric
4012b1c73532SDimitry Andric if (OutsideUses.empty())
4013c0981da4SDimitry Andric continue;
4014c0981da4SDimitry Andric
4015c0981da4SDimitry Andric HasBroadcastValues = true;
4016c0981da4SDimitry Andric
4017c0981da4SDimitry Andric // Emit a global variable in shared memory to store the broadcasted
4018c0981da4SDimitry Andric // value.
4019c0981da4SDimitry Andric auto *SharedMem = new GlobalVariable(
4020c0981da4SDimitry Andric M, I.getType(), /* IsConstant */ false,
4021c0981da4SDimitry Andric GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
4022c0981da4SDimitry Andric sanitizeForGlobalName(
4023c0981da4SDimitry Andric (I.getName() + ".guarded.output.alloc").str()),
4024c0981da4SDimitry Andric nullptr, GlobalValue::NotThreadLocal,
4025c0981da4SDimitry Andric static_cast<unsigned>(AddressSpace::Shared));
4026c0981da4SDimitry Andric
4027c0981da4SDimitry Andric // Emit a store instruction to update the value.
4028ac9a064cSDimitry Andric new StoreInst(&I, SharedMem,
4029ac9a064cSDimitry Andric RegionEndBB->getTerminator()->getIterator());
4030c0981da4SDimitry Andric
4031ac9a064cSDimitry Andric LoadInst *LoadI = new LoadInst(
4032ac9a064cSDimitry Andric I.getType(), SharedMem, I.getName() + ".guarded.output.load",
4033ac9a064cSDimitry Andric RegionBarrierBB->getTerminator()->getIterator());
4034c0981da4SDimitry Andric
4035c0981da4SDimitry Andric // Emit a load instruction and replace uses of the output value.
4036b1c73532SDimitry Andric for (Use *U : OutsideUses)
4037b1c73532SDimitry Andric A.changeUseAfterManifest(*U, *LoadI);
4038c0981da4SDimitry Andric }
4039c0981da4SDimitry Andric
4040c0981da4SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4041c0981da4SDimitry Andric
4042c0981da4SDimitry Andric // Go to tid check BB in ParentBB.
4043c0981da4SDimitry Andric const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
4044c0981da4SDimitry Andric ParentBB->getTerminator()->eraseFromParent();
4045c0981da4SDimitry Andric OpenMPIRBuilder::LocationDescription Loc(
4046c0981da4SDimitry Andric InsertPointTy(ParentBB, ParentBB->end()), DL);
4047c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(Loc);
40486f8fc217SDimitry Andric uint32_t SrcLocStrSize;
40496f8fc217SDimitry Andric auto *SrcLocStr =
40506f8fc217SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize);
40516f8fc217SDimitry Andric Value *Ident =
40526f8fc217SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4053c0981da4SDimitry Andric BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
4054c0981da4SDimitry Andric
4055c0981da4SDimitry Andric // Add check for Tid in RegionCheckTidBB
4056c0981da4SDimitry Andric RegionCheckTidBB->getTerminator()->eraseFromParent();
4057c0981da4SDimitry Andric OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
4058c0981da4SDimitry Andric InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
4059c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
4060c0981da4SDimitry Andric FunctionCallee HardwareTidFn =
4061c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4062c0981da4SDimitry Andric M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
40636f8fc217SDimitry Andric CallInst *Tid =
4064c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
40656f8fc217SDimitry Andric Tid->setDebugLoc(DL);
40666f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(HardwareTidFn, Tid);
4067c0981da4SDimitry Andric Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
4068c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.Builder
4069c0981da4SDimitry Andric .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
4070c0981da4SDimitry Andric ->setDebugLoc(DL);
4071c0981da4SDimitry Andric
4072c0981da4SDimitry Andric // First barrier for synchronization, ensures main thread has updated
4073c0981da4SDimitry Andric // values.
4074c0981da4SDimitry Andric FunctionCallee BarrierFn =
4075c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4076c0981da4SDimitry Andric M, OMPRTL___kmpc_barrier_simple_spmd);
4077c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
4078c0981da4SDimitry Andric RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
40796f8fc217SDimitry Andric CallInst *Barrier =
40806f8fc217SDimitry Andric OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid});
40816f8fc217SDimitry Andric Barrier->setDebugLoc(DL);
40826f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
4083c0981da4SDimitry Andric
4084c0981da4SDimitry Andric // Second barrier ensures workers have read broadcast values.
40856f8fc217SDimitry Andric if (HasBroadcastValues) {
4086ac9a064cSDimitry Andric CallInst *Barrier =
4087ac9a064cSDimitry Andric CallInst::Create(BarrierFn, {Ident, Tid}, "",
4088ac9a064cSDimitry Andric RegionBarrierBB->getTerminator()->getIterator());
40896f8fc217SDimitry Andric Barrier->setDebugLoc(DL);
40906f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
40916f8fc217SDimitry Andric }
4092c0981da4SDimitry Andric };
4093c0981da4SDimitry Andric
4094c0981da4SDimitry Andric auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
4095c0981da4SDimitry Andric SmallPtrSet<BasicBlock *, 8> Visited;
4096c0981da4SDimitry Andric for (Instruction *GuardedI : SPMDCompatibilityTracker) {
4097c0981da4SDimitry Andric BasicBlock *BB = GuardedI->getParent();
4098c0981da4SDimitry Andric if (!Visited.insert(BB).second)
4099c0981da4SDimitry Andric continue;
4100c0981da4SDimitry Andric
4101c0981da4SDimitry Andric SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
4102c0981da4SDimitry Andric Instruction *LastEffect = nullptr;
4103c0981da4SDimitry Andric BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
4104c0981da4SDimitry Andric while (++IP != IPEnd) {
4105c0981da4SDimitry Andric if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
4106c0981da4SDimitry Andric continue;
4107c0981da4SDimitry Andric Instruction *I = &*IP;
4108c0981da4SDimitry Andric if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
4109c0981da4SDimitry Andric continue;
4110c0981da4SDimitry Andric if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
4111c0981da4SDimitry Andric LastEffect = nullptr;
4112c0981da4SDimitry Andric continue;
4113c0981da4SDimitry Andric }
4114c0981da4SDimitry Andric if (LastEffect)
4115c0981da4SDimitry Andric Reorders.push_back({I, LastEffect});
4116c0981da4SDimitry Andric LastEffect = &*IP;
4117c0981da4SDimitry Andric }
4118c0981da4SDimitry Andric for (auto &Reorder : Reorders)
4119c0981da4SDimitry Andric Reorder.first->moveBefore(Reorder.second);
4120c0981da4SDimitry Andric }
4121c0981da4SDimitry Andric
4122c0981da4SDimitry Andric SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
4123c0981da4SDimitry Andric
4124c0981da4SDimitry Andric for (Instruction *GuardedI : SPMDCompatibilityTracker) {
4125c0981da4SDimitry Andric BasicBlock *BB = GuardedI->getParent();
4126c0981da4SDimitry Andric auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
4127c0981da4SDimitry Andric IRPosition::function(*GuardedI->getFunction()), nullptr,
4128c0981da4SDimitry Andric DepClassTy::NONE);
4129c0981da4SDimitry Andric assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo");
4130c0981da4SDimitry Andric auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
4131c0981da4SDimitry Andric // Continue if instruction is already guarded.
4132c0981da4SDimitry Andric if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
4133c0981da4SDimitry Andric continue;
4134c0981da4SDimitry Andric
4135c0981da4SDimitry Andric Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
4136c0981da4SDimitry Andric for (Instruction &I : *BB) {
4137c0981da4SDimitry Andric // If instruction I needs to be guarded update the guarded region
4138c0981da4SDimitry Andric // bounds.
4139c0981da4SDimitry Andric if (SPMDCompatibilityTracker.contains(&I)) {
4140c0981da4SDimitry Andric CalleeAAFunction.getGuardedInstructions().insert(&I);
4141c0981da4SDimitry Andric if (GuardedRegionStart)
4142c0981da4SDimitry Andric GuardedRegionEnd = &I;
4143c0981da4SDimitry Andric else
4144c0981da4SDimitry Andric GuardedRegionStart = GuardedRegionEnd = &I;
4145c0981da4SDimitry Andric
4146c0981da4SDimitry Andric continue;
4147c0981da4SDimitry Andric }
4148c0981da4SDimitry Andric
4149c0981da4SDimitry Andric // Instruction I does not need guarding, store
4150c0981da4SDimitry Andric // any region found and reset bounds.
4151c0981da4SDimitry Andric if (GuardedRegionStart) {
4152c0981da4SDimitry Andric GuardedRegions.push_back(
4153c0981da4SDimitry Andric std::make_pair(GuardedRegionStart, GuardedRegionEnd));
4154c0981da4SDimitry Andric GuardedRegionStart = nullptr;
4155c0981da4SDimitry Andric GuardedRegionEnd = nullptr;
4156c0981da4SDimitry Andric }
4157c0981da4SDimitry Andric }
4158c0981da4SDimitry Andric }
4159c0981da4SDimitry Andric
4160c0981da4SDimitry Andric for (auto &GR : GuardedRegions)
4161c0981da4SDimitry Andric CreateGuardedRegion(GR.first, GR.second);
4162e3b55780SDimitry Andric }
4163e3b55780SDimitry Andric
forceSingleThreadPerWorkgroupHelper__anon7bbaa8dc0111::AAKernelInfoFunction4164e3b55780SDimitry Andric void forceSingleThreadPerWorkgroupHelper(Attributor &A) {
4165e3b55780SDimitry Andric // Only allow 1 thread per workgroup to continue executing the user code.
4166e3b55780SDimitry Andric //
4167e3b55780SDimitry Andric // InitCB = __kmpc_target_init(...)
4168e3b55780SDimitry Andric // ThreadIdInBlock = __kmpc_get_hardware_thread_id_in_block();
4169e3b55780SDimitry Andric // if (ThreadIdInBlock != 0) return;
4170e3b55780SDimitry Andric // UserCode:
4171e3b55780SDimitry Andric // // user code
4172e3b55780SDimitry Andric //
4173e3b55780SDimitry Andric auto &Ctx = getAnchorValue().getContext();
4174e3b55780SDimitry Andric Function *Kernel = getAssociatedFunction();
4175e3b55780SDimitry Andric assert(Kernel && "Expected an associated function!");
4176e3b55780SDimitry Andric
4177e3b55780SDimitry Andric // Create block for user code to branch to from initial block.
4178e3b55780SDimitry Andric BasicBlock *InitBB = KernelInitCB->getParent();
4179e3b55780SDimitry Andric BasicBlock *UserCodeBB = InitBB->splitBasicBlock(
4180e3b55780SDimitry Andric KernelInitCB->getNextNode(), "main.thread.user_code");
4181e3b55780SDimitry Andric BasicBlock *ReturnBB =
4182e3b55780SDimitry Andric BasicBlock::Create(Ctx, "exit.threads", Kernel, UserCodeBB);
4183e3b55780SDimitry Andric
4184e3b55780SDimitry Andric // Register blocks with attributor:
4185e3b55780SDimitry Andric A.registerManifestAddedBasicBlock(*InitBB);
4186e3b55780SDimitry Andric A.registerManifestAddedBasicBlock(*UserCodeBB);
4187e3b55780SDimitry Andric A.registerManifestAddedBasicBlock(*ReturnBB);
4188e3b55780SDimitry Andric
4189e3b55780SDimitry Andric // Debug location:
4190e3b55780SDimitry Andric const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
4191e3b55780SDimitry Andric ReturnInst::Create(Ctx, ReturnBB)->setDebugLoc(DLoc);
4192e3b55780SDimitry Andric InitBB->getTerminator()->eraseFromParent();
4193e3b55780SDimitry Andric
4194e3b55780SDimitry Andric // Prepare call to OMPRTL___kmpc_get_hardware_thread_id_in_block.
4195e3b55780SDimitry Andric Module &M = *Kernel->getParent();
4196e3b55780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4197e3b55780SDimitry Andric FunctionCallee ThreadIdInBlockFn =
4198e3b55780SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4199e3b55780SDimitry Andric M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
4200e3b55780SDimitry Andric
4201e3b55780SDimitry Andric // Get thread ID in block.
4202e3b55780SDimitry Andric CallInst *ThreadIdInBlock =
4203e3b55780SDimitry Andric CallInst::Create(ThreadIdInBlockFn, "thread_id.in.block", InitBB);
4204e3b55780SDimitry Andric OMPInfoCache.setCallingConvention(ThreadIdInBlockFn, ThreadIdInBlock);
4205e3b55780SDimitry Andric ThreadIdInBlock->setDebugLoc(DLoc);
4206e3b55780SDimitry Andric
4207e3b55780SDimitry Andric // Eliminate all threads in the block with ID not equal to 0:
4208e3b55780SDimitry Andric Instruction *IsMainThread =
4209e3b55780SDimitry Andric ICmpInst::Create(ICmpInst::ICmp, CmpInst::ICMP_NE, ThreadIdInBlock,
4210e3b55780SDimitry Andric ConstantInt::get(ThreadIdInBlock->getType(), 0),
4211e3b55780SDimitry Andric "thread.is_main", InitBB);
4212e3b55780SDimitry Andric IsMainThread->setDebugLoc(DLoc);
4213e3b55780SDimitry Andric BranchInst::Create(ReturnBB, UserCodeBB, IsMainThread, InitBB);
4214e3b55780SDimitry Andric }
4215e3b55780SDimitry Andric
changeToSPMDMode__anon7bbaa8dc0111::AAKernelInfoFunction4216e3b55780SDimitry Andric bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
4217e3b55780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4218e3b55780SDimitry Andric
42197fa27ce4SDimitry Andric // We cannot change to SPMD mode if the runtime functions aren't availible.
42207fa27ce4SDimitry Andric if (!OMPInfoCache.runtimeFnsAvailable(
42217fa27ce4SDimitry Andric {OMPRTL___kmpc_get_hardware_thread_id_in_block,
42227fa27ce4SDimitry Andric OMPRTL___kmpc_barrier_simple_spmd}))
42237fa27ce4SDimitry Andric return false;
42247fa27ce4SDimitry Andric
4225e3b55780SDimitry Andric if (!SPMDCompatibilityTracker.isAssumed()) {
4226e3b55780SDimitry Andric for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
4227e3b55780SDimitry Andric if (!NonCompatibleI)
4228e3b55780SDimitry Andric continue;
4229e3b55780SDimitry Andric
4230e3b55780SDimitry Andric // Skip diagnostics on calls to known OpenMP runtime functions for now.
4231e3b55780SDimitry Andric if (auto *CB = dyn_cast<CallBase>(NonCompatibleI))
4232e3b55780SDimitry Andric if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction()))
4233e3b55780SDimitry Andric continue;
4234e3b55780SDimitry Andric
4235e3b55780SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) {
4236e3b55780SDimitry Andric ORA << "Value has potential side effects preventing SPMD-mode "
4237e3b55780SDimitry Andric "execution";
4238e3b55780SDimitry Andric if (isa<CallBase>(NonCompatibleI)) {
4239ac9a064cSDimitry Andric ORA << ". Add `[[omp::assume(\"ompx_spmd_amenable\")]]` to "
4240e3b55780SDimitry Andric "the called function to override";
4241e3b55780SDimitry Andric }
4242e3b55780SDimitry Andric return ORA << ".";
4243e3b55780SDimitry Andric };
4244e3b55780SDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121",
4245e3b55780SDimitry Andric Remark);
4246e3b55780SDimitry Andric
4247e3b55780SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "
4248e3b55780SDimitry Andric << *NonCompatibleI << "\n");
4249e3b55780SDimitry Andric }
4250e3b55780SDimitry Andric
4251e3b55780SDimitry Andric return false;
4252e3b55780SDimitry Andric }
4253e3b55780SDimitry Andric
4254e3b55780SDimitry Andric // Get the actual kernel, could be the caller of the anchor scope if we have
4255e3b55780SDimitry Andric // a debug wrapper.
4256e3b55780SDimitry Andric Function *Kernel = getAnchorScope();
4257e3b55780SDimitry Andric if (Kernel->hasLocalLinkage()) {
4258e3b55780SDimitry Andric assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper.");
4259e3b55780SDimitry Andric auto *CB = cast<CallBase>(Kernel->user_back());
4260e3b55780SDimitry Andric Kernel = CB->getCaller();
4261e3b55780SDimitry Andric }
4262b1c73532SDimitry Andric assert(omp::isOpenMPKernel(*Kernel) && "Expected kernel function!");
4263e3b55780SDimitry Andric
4264e3b55780SDimitry Andric // Check if the kernel is already in SPMD mode, if so, return success.
4265b1c73532SDimitry Andric ConstantStruct *ExistingKernelEnvC =
4266b1c73532SDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
4267b1c73532SDimitry Andric auto *ExecModeC =
4268b1c73532SDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
4269b1c73532SDimitry Andric const int8_t ExecModeVal = ExecModeC->getSExtValue();
4270e3b55780SDimitry Andric if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
4271e3b55780SDimitry Andric return true;
4272e3b55780SDimitry Andric
4273e3b55780SDimitry Andric // We will now unconditionally modify the IR, indicate a change.
4274e3b55780SDimitry Andric Changed = ChangeStatus::CHANGED;
4275e3b55780SDimitry Andric
4276e3b55780SDimitry Andric // Do not use instruction guards when no parallel is present inside
4277e3b55780SDimitry Andric // the target region.
4278e3b55780SDimitry Andric if (mayContainParallelRegion())
4279e3b55780SDimitry Andric insertInstructionGuardsHelper(A);
4280e3b55780SDimitry Andric else
4281e3b55780SDimitry Andric forceSingleThreadPerWorkgroupHelper(A);
4282c0981da4SDimitry Andric
4283c0981da4SDimitry Andric // Adjust the global exec mode flag that tells the runtime what mode this
4284c0981da4SDimitry Andric // kernel is executed in.
4285c0981da4SDimitry Andric assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
4286c0981da4SDimitry Andric "Initially non-SPMD kernel has SPMD exec mode!");
428799aabd70SDimitry Andric setExecModeOfKernelEnvironment(
428899aabd70SDimitry Andric ConstantInt::get(ExecModeC->getIntegerType(),
428999aabd70SDimitry Andric ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
4290344a3780SDimitry Andric
4291344a3780SDimitry Andric ++NumOpenMPTargetRegionKernelsSPMD;
4292344a3780SDimitry Andric
4293344a3780SDimitry Andric auto Remark = [&](OptimizationRemark OR) {
4294344a3780SDimitry Andric return OR << "Transformed generic-mode kernel to SPMD-mode.";
4295344a3780SDimitry Andric };
4296344a3780SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark);
4297344a3780SDimitry Andric return true;
4298344a3780SDimitry Andric };
4299344a3780SDimitry Andric
buildCustomStateMachine__anon7bbaa8dc0111::AAKernelInfoFunction4300b1c73532SDimitry Andric bool buildCustomStateMachine(Attributor &A, ChangeStatus &Changed) {
4301c0981da4SDimitry Andric // If we have disabled state machine rewrites, don't make a custom one
4302c0981da4SDimitry Andric if (DisableOpenMPOptStateMachineRewrite)
4303b1c73532SDimitry Andric return false;
4304344a3780SDimitry Andric
4305c0981da4SDimitry Andric // Don't rewrite the state machine if we are not in a valid state.
4306c0981da4SDimitry Andric if (!ReachedKnownParallelRegions.isValidState())
4307b1c73532SDimitry Andric return false;
4308c0981da4SDimitry Andric
43097fa27ce4SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
43107fa27ce4SDimitry Andric if (!OMPInfoCache.runtimeFnsAvailable(
43117fa27ce4SDimitry Andric {OMPRTL___kmpc_get_hardware_num_threads_in_block,
43127fa27ce4SDimitry Andric OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
43137fa27ce4SDimitry Andric OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
4314b1c73532SDimitry Andric return false;
43157fa27ce4SDimitry Andric
4316b1c73532SDimitry Andric ConstantStruct *ExistingKernelEnvC =
4317b1c73532SDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
4318344a3780SDimitry Andric
4319344a3780SDimitry Andric // Check if the current configuration is non-SPMD and generic state machine.
4320344a3780SDimitry Andric // If we already have SPMD mode or a custom state machine we do not need to
4321344a3780SDimitry Andric // go any further. If it is anything but a constant something is weird and
4322344a3780SDimitry Andric // we give up.
4323b1c73532SDimitry Andric ConstantInt *UseStateMachineC =
4324b1c73532SDimitry Andric KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
4325b1c73532SDimitry Andric ExistingKernelEnvC);
4326b1c73532SDimitry Andric ConstantInt *ModeC =
4327b1c73532SDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
4328344a3780SDimitry Andric
4329344a3780SDimitry Andric // If we are stuck with generic mode, try to create a custom device (=GPU)
4330344a3780SDimitry Andric // state machine which is specialized for the parallel regions that are
4331344a3780SDimitry Andric // reachable by the kernel.
4332b1c73532SDimitry Andric if (UseStateMachineC->isZero() ||
4333b1c73532SDimitry Andric (ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
4334b1c73532SDimitry Andric return false;
4335b1c73532SDimitry Andric
4336b1c73532SDimitry Andric Changed = ChangeStatus::CHANGED;
4337344a3780SDimitry Andric
4338344a3780SDimitry Andric // If not SPMD mode, indicate we use a custom state machine now.
4339b1c73532SDimitry Andric setUseGenericStateMachineOfKernelEnvironment(
434099aabd70SDimitry Andric ConstantInt::get(UseStateMachineC->getIntegerType(), false));
4341344a3780SDimitry Andric
4342344a3780SDimitry Andric // If we don't actually need a state machine we are done here. This can
4343344a3780SDimitry Andric // happen if there simply are no parallel regions. In the resulting kernel
4344344a3780SDimitry Andric // all worker threads will simply exit right away, leaving the main thread
4345344a3780SDimitry Andric // to do the work alone.
4346c0981da4SDimitry Andric if (!mayContainParallelRegion()) {
4347344a3780SDimitry Andric ++NumOpenMPTargetRegionKernelsWithoutStateMachine;
4348344a3780SDimitry Andric
4349344a3780SDimitry Andric auto Remark = [&](OptimizationRemark OR) {
4350344a3780SDimitry Andric return OR << "Removing unused state machine from generic-mode kernel.";
4351344a3780SDimitry Andric };
4352344a3780SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);
4353344a3780SDimitry Andric
4354b1c73532SDimitry Andric return true;
4355344a3780SDimitry Andric }
4356344a3780SDimitry Andric
4357344a3780SDimitry Andric // Keep track in the statistics of our new shiny custom state machine.
4358344a3780SDimitry Andric if (ReachedUnknownParallelRegions.empty()) {
4359344a3780SDimitry Andric ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;
4360344a3780SDimitry Andric
4361344a3780SDimitry Andric auto Remark = [&](OptimizationRemark OR) {
4362344a3780SDimitry Andric return OR << "Rewriting generic-mode kernel with a customized state "
4363344a3780SDimitry Andric "machine.";
4364344a3780SDimitry Andric };
4365344a3780SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark);
4366344a3780SDimitry Andric } else {
4367344a3780SDimitry Andric ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;
4368344a3780SDimitry Andric
4369344a3780SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis OR) {
4370344a3780SDimitry Andric return OR << "Generic-mode kernel is executed with a customized state "
4371344a3780SDimitry Andric "machine that requires a fallback.";
4372344a3780SDimitry Andric };
4373344a3780SDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark);
4374344a3780SDimitry Andric
4375344a3780SDimitry Andric // Tell the user why we ended up with a fallback.
4376344a3780SDimitry Andric for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
4377344a3780SDimitry Andric if (!UnknownParallelRegionCB)
4378344a3780SDimitry Andric continue;
4379344a3780SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) {
4380344a3780SDimitry Andric return ORA << "Call may contain unknown parallel regions. Use "
4381ac9a064cSDimitry Andric << "`[[omp::assume(\"omp_no_parallelism\")]]` to "
4382344a3780SDimitry Andric "override.";
4383344a3780SDimitry Andric };
4384344a3780SDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB,
4385344a3780SDimitry Andric "OMP133", Remark);
4386344a3780SDimitry Andric }
4387344a3780SDimitry Andric }
4388344a3780SDimitry Andric
4389344a3780SDimitry Andric // Create all the blocks:
4390344a3780SDimitry Andric //
4391344a3780SDimitry Andric // InitCB = __kmpc_target_init(...)
4392c0981da4SDimitry Andric // BlockHwSize =
4393c0981da4SDimitry Andric // __kmpc_get_hardware_num_threads_in_block();
4394c0981da4SDimitry Andric // WarpSize = __kmpc_get_warp_size();
4395c0981da4SDimitry Andric // BlockSize = BlockHwSize - WarpSize;
4396145449b1SDimitry Andric // IsWorkerCheckBB: bool IsWorker = InitCB != -1;
4397344a3780SDimitry Andric // if (IsWorker) {
4398145449b1SDimitry Andric // if (InitCB >= BlockSize) return;
4399c0981da4SDimitry Andric // SMBeginBB: __kmpc_barrier_simple_generic(...);
4400344a3780SDimitry Andric // void *WorkFn;
4401344a3780SDimitry Andric // bool Active = __kmpc_kernel_parallel(&WorkFn);
4402344a3780SDimitry Andric // if (!WorkFn) return;
4403344a3780SDimitry Andric // SMIsActiveCheckBB: if (Active) {
4404344a3780SDimitry Andric // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>)
4405344a3780SDimitry Andric // ParFn0(...);
4406344a3780SDimitry Andric // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>)
4407344a3780SDimitry Andric // ParFn1(...);
4408344a3780SDimitry Andric // ...
4409344a3780SDimitry Andric // SMIfCascadeCurrentBB: else
4410344a3780SDimitry Andric // ((WorkFnTy*)WorkFn)(...);
4411344a3780SDimitry Andric // SMEndParallelBB: __kmpc_kernel_end_parallel(...);
4412344a3780SDimitry Andric // }
4413c0981da4SDimitry Andric // SMDoneBB: __kmpc_barrier_simple_generic(...);
4414344a3780SDimitry Andric // goto SMBeginBB;
4415344a3780SDimitry Andric // }
4416344a3780SDimitry Andric // UserCodeEntryBB: // user code
4417344a3780SDimitry Andric // __kmpc_target_deinit(...)
4418344a3780SDimitry Andric //
4419b1c73532SDimitry Andric auto &Ctx = getAnchorValue().getContext();
4420344a3780SDimitry Andric Function *Kernel = getAssociatedFunction();
4421344a3780SDimitry Andric assert(Kernel && "Expected an associated function!");
4422344a3780SDimitry Andric
4423344a3780SDimitry Andric BasicBlock *InitBB = KernelInitCB->getParent();
4424344a3780SDimitry Andric BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
4425344a3780SDimitry Andric KernelInitCB->getNextNode(), "thread.user_code.check");
4426c0981da4SDimitry Andric BasicBlock *IsWorkerCheckBB =
4427c0981da4SDimitry Andric BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB);
4428344a3780SDimitry Andric BasicBlock *StateMachineBeginBB = BasicBlock::Create(
4429344a3780SDimitry Andric Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
4430344a3780SDimitry Andric BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
4431344a3780SDimitry Andric Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
4432344a3780SDimitry Andric BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
4433344a3780SDimitry Andric Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
4434344a3780SDimitry Andric BasicBlock *StateMachineIfCascadeCurrentBB =
4435344a3780SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
4436344a3780SDimitry Andric Kernel, UserCodeEntryBB);
4437344a3780SDimitry Andric BasicBlock *StateMachineEndParallelBB =
4438344a3780SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
4439344a3780SDimitry Andric Kernel, UserCodeEntryBB);
4440344a3780SDimitry Andric BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
4441344a3780SDimitry Andric Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
4442344a3780SDimitry Andric A.registerManifestAddedBasicBlock(*InitBB);
4443344a3780SDimitry Andric A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
4444c0981da4SDimitry Andric A.registerManifestAddedBasicBlock(*IsWorkerCheckBB);
4445344a3780SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
4446344a3780SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
4447344a3780SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
4448344a3780SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB);
4449344a3780SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB);
4450344a3780SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB);
4451344a3780SDimitry Andric
4452344a3780SDimitry Andric const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
4453344a3780SDimitry Andric ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
4454344a3780SDimitry Andric InitBB->getTerminator()->eraseFromParent();
4455c0981da4SDimitry Andric
4456145449b1SDimitry Andric Instruction *IsWorker =
4457145449b1SDimitry Andric ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
4458145449b1SDimitry Andric ConstantInt::get(KernelInitCB->getType(), -1),
4459145449b1SDimitry Andric "thread.is_worker", InitBB);
4460145449b1SDimitry Andric IsWorker->setDebugLoc(DLoc);
4461145449b1SDimitry Andric BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
4462145449b1SDimitry Andric
4463c0981da4SDimitry Andric Module &M = *Kernel->getParent();
4464c0981da4SDimitry Andric FunctionCallee BlockHwSizeFn =
4465c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4466c0981da4SDimitry Andric M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
4467c0981da4SDimitry Andric FunctionCallee WarpSizeFn =
4468c0981da4SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4469c0981da4SDimitry Andric M, OMPRTL___kmpc_get_warp_size);
44706f8fc217SDimitry Andric CallInst *BlockHwSize =
4471145449b1SDimitry Andric CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
44726f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
4473c0981da4SDimitry Andric BlockHwSize->setDebugLoc(DLoc);
4474145449b1SDimitry Andric CallInst *WarpSize =
4475145449b1SDimitry Andric CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
44766f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
4477c0981da4SDimitry Andric WarpSize->setDebugLoc(DLoc);
4478145449b1SDimitry Andric Instruction *BlockSize = BinaryOperator::CreateSub(
4479145449b1SDimitry Andric BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
4480c0981da4SDimitry Andric BlockSize->setDebugLoc(DLoc);
4481145449b1SDimitry Andric Instruction *IsMainOrWorker = ICmpInst::Create(
4482145449b1SDimitry Andric ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
4483145449b1SDimitry Andric "thread.is_main_or_worker", IsWorkerCheckBB);
4484c0981da4SDimitry Andric IsMainOrWorker->setDebugLoc(DLoc);
4485145449b1SDimitry Andric BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
4486145449b1SDimitry Andric IsMainOrWorker, IsWorkerCheckBB);
4487344a3780SDimitry Andric
4488344a3780SDimitry Andric // Create local storage for the work function pointer.
4489c0981da4SDimitry Andric const DataLayout &DL = M.getDataLayout();
4490b1c73532SDimitry Andric Type *VoidPtrTy = PointerType::getUnqual(Ctx);
4491c0981da4SDimitry Andric Instruction *WorkFnAI =
4492c0981da4SDimitry Andric new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
4493ac9a064cSDimitry Andric "worker.work_fn.addr", Kernel->getEntryBlock().begin());
4494344a3780SDimitry Andric WorkFnAI->setDebugLoc(DLoc);
4495344a3780SDimitry Andric
4496344a3780SDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(
4497344a3780SDimitry Andric OpenMPIRBuilder::LocationDescription(
4498344a3780SDimitry Andric IRBuilder<>::InsertPoint(StateMachineBeginBB,
4499344a3780SDimitry Andric StateMachineBeginBB->end()),
4500344a3780SDimitry Andric DLoc));
4501344a3780SDimitry Andric
4502b1c73532SDimitry Andric Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC);
4503344a3780SDimitry Andric Value *GTid = KernelInitCB;
4504344a3780SDimitry Andric
4505344a3780SDimitry Andric FunctionCallee BarrierFn =
4506344a3780SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4507c0981da4SDimitry Andric M, OMPRTL___kmpc_barrier_simple_generic);
45086f8fc217SDimitry Andric CallInst *Barrier =
45096f8fc217SDimitry Andric CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB);
45106f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
45116f8fc217SDimitry Andric Barrier->setDebugLoc(DLoc);
4512344a3780SDimitry Andric
4513c0981da4SDimitry Andric if (WorkFnAI->getType()->getPointerAddressSpace() !=
4514c0981da4SDimitry Andric (unsigned int)AddressSpace::Generic) {
4515c0981da4SDimitry Andric WorkFnAI = new AddrSpaceCastInst(
45167fa27ce4SDimitry Andric WorkFnAI, PointerType::get(Ctx, (unsigned int)AddressSpace::Generic),
4517c0981da4SDimitry Andric WorkFnAI->getName() + ".generic", StateMachineBeginBB);
4518c0981da4SDimitry Andric WorkFnAI->setDebugLoc(DLoc);
4519c0981da4SDimitry Andric }
4520c0981da4SDimitry Andric
4521344a3780SDimitry Andric FunctionCallee KernelParallelFn =
4522344a3780SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
4523344a3780SDimitry Andric M, OMPRTL___kmpc_kernel_parallel);
45246f8fc217SDimitry Andric CallInst *IsActiveWorker = CallInst::Create(
4525344a3780SDimitry Andric KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
45266f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker);
4527344a3780SDimitry Andric IsActiveWorker->setDebugLoc(DLoc);
4528344a3780SDimitry Andric Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
4529344a3780SDimitry Andric StateMachineBeginBB);
4530344a3780SDimitry Andric WorkFn->setDebugLoc(DLoc);
4531344a3780SDimitry Andric
4532344a3780SDimitry Andric FunctionType *ParallelRegionFnTy = FunctionType::get(
4533344a3780SDimitry Andric Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
4534344a3780SDimitry Andric false);
4535344a3780SDimitry Andric
4536344a3780SDimitry Andric Instruction *IsDone =
4537344a3780SDimitry Andric ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
4538344a3780SDimitry Andric Constant::getNullValue(VoidPtrTy), "worker.is_done",
4539344a3780SDimitry Andric StateMachineBeginBB);
4540344a3780SDimitry Andric IsDone->setDebugLoc(DLoc);
4541344a3780SDimitry Andric BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
4542344a3780SDimitry Andric IsDone, StateMachineBeginBB)
4543344a3780SDimitry Andric ->setDebugLoc(DLoc);
4544344a3780SDimitry Andric
4545344a3780SDimitry Andric BranchInst::Create(StateMachineIfCascadeCurrentBB,
4546344a3780SDimitry Andric StateMachineDoneBarrierBB, IsActiveWorker,
4547344a3780SDimitry Andric StateMachineIsActiveCheckBB)
4548344a3780SDimitry Andric ->setDebugLoc(DLoc);
4549344a3780SDimitry Andric
4550344a3780SDimitry Andric Value *ZeroArg =
4551344a3780SDimitry Andric Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
4552344a3780SDimitry Andric
4553b1c73532SDimitry Andric const unsigned int WrapperFunctionArgNo = 6;
4554b1c73532SDimitry Andric
4555344a3780SDimitry Andric // Now that we have most of the CFG skeleton it is time for the if-cascade
4556344a3780SDimitry Andric // that checks the function pointer we got from the runtime against the
4557344a3780SDimitry Andric // parallel regions we expect, if there are any.
4558c0981da4SDimitry Andric for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
4559b1c73532SDimitry Andric auto *CB = ReachedKnownParallelRegions[I];
4560b1c73532SDimitry Andric auto *ParallelRegion = dyn_cast<Function>(
4561b1c73532SDimitry Andric CB->getArgOperand(WrapperFunctionArgNo)->stripPointerCasts());
4562344a3780SDimitry Andric BasicBlock *PRExecuteBB = BasicBlock::Create(
4563344a3780SDimitry Andric Ctx, "worker_state_machine.parallel_region.execute", Kernel,
4564344a3780SDimitry Andric StateMachineEndParallelBB);
4565344a3780SDimitry Andric CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
4566344a3780SDimitry Andric ->setDebugLoc(DLoc);
4567344a3780SDimitry Andric BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
4568344a3780SDimitry Andric ->setDebugLoc(DLoc);
4569344a3780SDimitry Andric
4570344a3780SDimitry Andric BasicBlock *PRNextBB =
4571344a3780SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
4572344a3780SDimitry Andric Kernel, StateMachineEndParallelBB);
4573b1c73532SDimitry Andric A.registerManifestAddedBasicBlock(*PRExecuteBB);
4574b1c73532SDimitry Andric A.registerManifestAddedBasicBlock(*PRNextBB);
4575344a3780SDimitry Andric
4576344a3780SDimitry Andric // Check if we need to compare the pointer at all or if we can just
4577344a3780SDimitry Andric // call the parallel region function.
4578344a3780SDimitry Andric Value *IsPR;
4579c0981da4SDimitry Andric if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
4580344a3780SDimitry Andric Instruction *CmpI = ICmpInst::Create(
4581b1c73532SDimitry Andric ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, ParallelRegion,
4582344a3780SDimitry Andric "worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
4583344a3780SDimitry Andric CmpI->setDebugLoc(DLoc);
4584344a3780SDimitry Andric IsPR = CmpI;
4585344a3780SDimitry Andric } else {
4586344a3780SDimitry Andric IsPR = ConstantInt::getTrue(Ctx);
4587344a3780SDimitry Andric }
4588344a3780SDimitry Andric
4589344a3780SDimitry Andric BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
4590344a3780SDimitry Andric StateMachineIfCascadeCurrentBB)
4591344a3780SDimitry Andric ->setDebugLoc(DLoc);
4592344a3780SDimitry Andric StateMachineIfCascadeCurrentBB = PRNextBB;
4593344a3780SDimitry Andric }
4594344a3780SDimitry Andric
4595344a3780SDimitry Andric // At the end of the if-cascade we place the indirect function pointer call
4596344a3780SDimitry Andric // in case we might need it, that is if there can be parallel regions we
4597344a3780SDimitry Andric // have not handled in the if-cascade above.
4598344a3780SDimitry Andric if (!ReachedUnknownParallelRegions.empty()) {
4599344a3780SDimitry Andric StateMachineIfCascadeCurrentBB->setName(
4600344a3780SDimitry Andric "worker_state_machine.parallel_region.fallback.execute");
4601b1c73532SDimitry Andric CallInst::Create(ParallelRegionFnTy, WorkFn, {ZeroArg, GTid}, "",
4602344a3780SDimitry Andric StateMachineIfCascadeCurrentBB)
4603344a3780SDimitry Andric ->setDebugLoc(DLoc);
4604344a3780SDimitry Andric }
4605344a3780SDimitry Andric BranchInst::Create(StateMachineEndParallelBB,
4606344a3780SDimitry Andric StateMachineIfCascadeCurrentBB)
4607344a3780SDimitry Andric ->setDebugLoc(DLoc);
4608344a3780SDimitry Andric
46096f8fc217SDimitry Andric FunctionCallee EndParallelFn =
46106f8fc217SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
46116f8fc217SDimitry Andric M, OMPRTL___kmpc_kernel_end_parallel);
46126f8fc217SDimitry Andric CallInst *EndParallel =
46136f8fc217SDimitry Andric CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB);
46146f8fc217SDimitry Andric OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel);
46156f8fc217SDimitry Andric EndParallel->setDebugLoc(DLoc);
4616344a3780SDimitry Andric BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
4617344a3780SDimitry Andric ->setDebugLoc(DLoc);
4618344a3780SDimitry Andric
4619344a3780SDimitry Andric CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
4620344a3780SDimitry Andric ->setDebugLoc(DLoc);
4621344a3780SDimitry Andric BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
4622344a3780SDimitry Andric ->setDebugLoc(DLoc);
4623344a3780SDimitry Andric
4624b1c73532SDimitry Andric return true;
4625344a3780SDimitry Andric }
4626344a3780SDimitry Andric
4627344a3780SDimitry Andric /// Fixpoint iteration update function. Will be called every time a dependence
4628344a3780SDimitry Andric /// changed its state (and in the beginning).
updateImpl__anon7bbaa8dc0111::AAKernelInfoFunction4629344a3780SDimitry Andric ChangeStatus updateImpl(Attributor &A) override {
4630344a3780SDimitry Andric KernelInfoState StateBefore = getState();
4631344a3780SDimitry Andric
4632b1c73532SDimitry Andric // When we leave this function this RAII will make sure the member
4633b1c73532SDimitry Andric // KernelEnvC is updated properly depending on the state. That member is
4634b1c73532SDimitry Andric // used for simplification of values and needs to be up to date at all
4635b1c73532SDimitry Andric // times.
4636b1c73532SDimitry Andric struct UpdateKernelEnvCRAII {
4637b1c73532SDimitry Andric AAKernelInfoFunction &AA;
4638b1c73532SDimitry Andric
4639b1c73532SDimitry Andric UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {}
4640b1c73532SDimitry Andric
4641b1c73532SDimitry Andric ~UpdateKernelEnvCRAII() {
4642b1c73532SDimitry Andric if (!AA.KernelEnvC)
4643b1c73532SDimitry Andric return;
4644b1c73532SDimitry Andric
4645b1c73532SDimitry Andric ConstantStruct *ExistingKernelEnvC =
4646b1c73532SDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB);
4647b1c73532SDimitry Andric
4648b1c73532SDimitry Andric if (!AA.isValidState()) {
4649b1c73532SDimitry Andric AA.KernelEnvC = ExistingKernelEnvC;
4650b1c73532SDimitry Andric return;
4651b1c73532SDimitry Andric }
4652b1c73532SDimitry Andric
4653b1c73532SDimitry Andric if (!AA.ReachedKnownParallelRegions.isValidState())
4654b1c73532SDimitry Andric AA.setUseGenericStateMachineOfKernelEnvironment(
4655b1c73532SDimitry Andric KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
4656b1c73532SDimitry Andric ExistingKernelEnvC));
4657b1c73532SDimitry Andric
4658b1c73532SDimitry Andric if (!AA.SPMDCompatibilityTracker.isValidState())
4659b1c73532SDimitry Andric AA.setExecModeOfKernelEnvironment(
4660b1c73532SDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC));
4661b1c73532SDimitry Andric
4662b1c73532SDimitry Andric ConstantInt *MayUseNestedParallelismC =
4663b1c73532SDimitry Andric KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(
4664b1c73532SDimitry Andric AA.KernelEnvC);
4665b1c73532SDimitry Andric ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get(
466699aabd70SDimitry Andric MayUseNestedParallelismC->getIntegerType(), AA.NestedParallelism);
4667b1c73532SDimitry Andric AA.setMayUseNestedParallelismOfKernelEnvironment(
4668b1c73532SDimitry Andric NewMayUseNestedParallelismC);
4669b1c73532SDimitry Andric }
4670b1c73532SDimitry Andric } RAII(*this);
4671b1c73532SDimitry Andric
4672344a3780SDimitry Andric // Callback to check a read/write instruction.
4673344a3780SDimitry Andric auto CheckRWInst = [&](Instruction &I) {
4674344a3780SDimitry Andric // We handle calls later.
4675344a3780SDimitry Andric if (isa<CallBase>(I))
4676344a3780SDimitry Andric return true;
4677344a3780SDimitry Andric // We only care about write effects.
4678344a3780SDimitry Andric if (!I.mayWriteToMemory())
4679344a3780SDimitry Andric return true;
4680344a3780SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) {
46817fa27ce4SDimitry Andric const auto *UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>(
4682e3b55780SDimitry Andric *this, IRPosition::value(*SI->getPointerOperand()),
4683e3b55780SDimitry Andric DepClassTy::OPTIONAL);
46847fa27ce4SDimitry Andric auto *HS = A.getAAFor<AAHeapToStack>(
4685c0981da4SDimitry Andric *this, IRPosition::function(*I.getFunction()),
4686c0981da4SDimitry Andric DepClassTy::OPTIONAL);
46877fa27ce4SDimitry Andric if (UnderlyingObjsAA &&
46887fa27ce4SDimitry Andric UnderlyingObjsAA->forallUnderlyingObjects([&](Value &Obj) {
4689e3b55780SDimitry Andric if (AA::isAssumedThreadLocalObject(A, Obj, *this))
4690c0981da4SDimitry Andric return true;
4691e3b55780SDimitry Andric // Check for AAHeapToStack moved objects which must not be
4692e3b55780SDimitry Andric // guarded.
4693e3b55780SDimitry Andric auto *CB = dyn_cast<CallBase>(&Obj);
46947fa27ce4SDimitry Andric return CB && HS && HS->isAssumedHeapToStack(*CB);
4695e3b55780SDimitry Andric }))
4696e3b55780SDimitry Andric return true;
4697c0981da4SDimitry Andric }
4698c0981da4SDimitry Andric
4699c0981da4SDimitry Andric // Insert instruction that needs guarding.
4700344a3780SDimitry Andric SPMDCompatibilityTracker.insert(&I);
4701344a3780SDimitry Andric return true;
4702344a3780SDimitry Andric };
4703344a3780SDimitry Andric
4704344a3780SDimitry Andric bool UsedAssumedInformationInCheckRWInst = false;
4705344a3780SDimitry Andric if (!SPMDCompatibilityTracker.isAtFixpoint())
4706344a3780SDimitry Andric if (!A.checkForAllReadWriteInstructions(
4707344a3780SDimitry Andric CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
4708344a3780SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4709344a3780SDimitry Andric
4710f65dcba8SDimitry Andric bool UsedAssumedInformationFromReachingKernels = false;
4711344a3780SDimitry Andric if (!IsKernelEntry) {
4712344a3780SDimitry Andric updateParallelLevels(A);
4713c0981da4SDimitry Andric
4714f65dcba8SDimitry Andric bool AllReachingKernelsKnown = true;
4715f65dcba8SDimitry Andric updateReachingKernelEntries(A, AllReachingKernelsKnown);
4716f65dcba8SDimitry Andric UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown;
4717f65dcba8SDimitry Andric
4718e3b55780SDimitry Andric if (!SPMDCompatibilityTracker.empty()) {
4719c0981da4SDimitry Andric if (!ParallelLevels.isValidState())
4720c0981da4SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4721f65dcba8SDimitry Andric else if (!ReachingKernelEntries.isValidState())
4722f65dcba8SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4723e3b55780SDimitry Andric else {
4724f65dcba8SDimitry Andric // Check if all reaching kernels agree on the mode as we can otherwise
4725f65dcba8SDimitry Andric // not guard instructions. We might not be sure about the mode so we
4726f65dcba8SDimitry Andric // we cannot fix the internal spmd-zation state either.
4727f65dcba8SDimitry Andric int SPMD = 0, Generic = 0;
4728f65dcba8SDimitry Andric for (auto *Kernel : ReachingKernelEntries) {
47297fa27ce4SDimitry Andric auto *CBAA = A.getAAFor<AAKernelInfo>(
4730f65dcba8SDimitry Andric *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL);
47317fa27ce4SDimitry Andric if (CBAA && CBAA->SPMDCompatibilityTracker.isValidState() &&
47327fa27ce4SDimitry Andric CBAA->SPMDCompatibilityTracker.isAssumed())
4733f65dcba8SDimitry Andric ++SPMD;
4734f65dcba8SDimitry Andric else
4735f65dcba8SDimitry Andric ++Generic;
47367fa27ce4SDimitry Andric if (!CBAA || !CBAA->SPMDCompatibilityTracker.isAtFixpoint())
4737f65dcba8SDimitry Andric UsedAssumedInformationFromReachingKernels = true;
4738f65dcba8SDimitry Andric }
4739f65dcba8SDimitry Andric if (SPMD != 0 && Generic != 0)
4740f65dcba8SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4741f65dcba8SDimitry Andric }
4742344a3780SDimitry Andric }
4743e3b55780SDimitry Andric }
4744344a3780SDimitry Andric
4745344a3780SDimitry Andric // Callback to check a call instruction.
4746c0981da4SDimitry Andric bool AllParallelRegionStatesWereFixed = true;
4747344a3780SDimitry Andric bool AllSPMDStatesWereFixed = true;
4748344a3780SDimitry Andric auto CheckCallInst = [&](Instruction &I) {
4749344a3780SDimitry Andric auto &CB = cast<CallBase>(I);
47507fa27ce4SDimitry Andric auto *CBAA = A.getAAFor<AAKernelInfo>(
4751344a3780SDimitry Andric *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
47527fa27ce4SDimitry Andric if (!CBAA)
47537fa27ce4SDimitry Andric return false;
47547fa27ce4SDimitry Andric getState() ^= CBAA->getState();
47557fa27ce4SDimitry Andric AllSPMDStatesWereFixed &= CBAA->SPMDCompatibilityTracker.isAtFixpoint();
4756c0981da4SDimitry Andric AllParallelRegionStatesWereFixed &=
47577fa27ce4SDimitry Andric CBAA->ReachedKnownParallelRegions.isAtFixpoint();
4758c0981da4SDimitry Andric AllParallelRegionStatesWereFixed &=
47597fa27ce4SDimitry Andric CBAA->ReachedUnknownParallelRegions.isAtFixpoint();
4760344a3780SDimitry Andric return true;
4761344a3780SDimitry Andric };
4762344a3780SDimitry Andric
4763344a3780SDimitry Andric bool UsedAssumedInformationInCheckCallInst = false;
4764344a3780SDimitry Andric if (!A.checkForAllCallLikeInstructions(
4765c0981da4SDimitry Andric CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
4766c0981da4SDimitry Andric LLVM_DEBUG(dbgs() << TAG
4767c0981da4SDimitry Andric << "Failed to visit all call-like instructions!\n";);
4768344a3780SDimitry Andric return indicatePessimisticFixpoint();
4769c0981da4SDimitry Andric }
4770c0981da4SDimitry Andric
4771c0981da4SDimitry Andric // If we haven't used any assumed information for the reached parallel
4772c0981da4SDimitry Andric // region states we can fix it.
4773c0981da4SDimitry Andric if (!UsedAssumedInformationInCheckCallInst &&
4774c0981da4SDimitry Andric AllParallelRegionStatesWereFixed) {
4775c0981da4SDimitry Andric ReachedKnownParallelRegions.indicateOptimisticFixpoint();
4776c0981da4SDimitry Andric ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
4777c0981da4SDimitry Andric }
4778c0981da4SDimitry Andric
4779344a3780SDimitry Andric // If we haven't used any assumed information for the SPMD state we can fix
4780344a3780SDimitry Andric // it.
4781344a3780SDimitry Andric if (!UsedAssumedInformationInCheckRWInst &&
4782f65dcba8SDimitry Andric !UsedAssumedInformationInCheckCallInst &&
4783f65dcba8SDimitry Andric !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed)
4784344a3780SDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint();
4785344a3780SDimitry Andric
4786344a3780SDimitry Andric return StateBefore == getState() ? ChangeStatus::UNCHANGED
4787344a3780SDimitry Andric : ChangeStatus::CHANGED;
4788344a3780SDimitry Andric }
4789344a3780SDimitry Andric
4790344a3780SDimitry Andric private:
4791344a3780SDimitry Andric /// Update info regarding reaching kernels.
updateReachingKernelEntries__anon7bbaa8dc0111::AAKernelInfoFunction4792f65dcba8SDimitry Andric void updateReachingKernelEntries(Attributor &A,
4793f65dcba8SDimitry Andric bool &AllReachingKernelsKnown) {
4794344a3780SDimitry Andric auto PredCallSite = [&](AbstractCallSite ACS) {
4795344a3780SDimitry Andric Function *Caller = ACS.getInstruction()->getFunction();
4796344a3780SDimitry Andric
4797344a3780SDimitry Andric assert(Caller && "Caller is nullptr");
4798344a3780SDimitry Andric
47997fa27ce4SDimitry Andric auto *CAA = A.getOrCreateAAFor<AAKernelInfo>(
4800344a3780SDimitry Andric IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
48017fa27ce4SDimitry Andric if (CAA && CAA->ReachingKernelEntries.isValidState()) {
48027fa27ce4SDimitry Andric ReachingKernelEntries ^= CAA->ReachingKernelEntries;
4803344a3780SDimitry Andric return true;
4804344a3780SDimitry Andric }
4805344a3780SDimitry Andric
4806344a3780SDimitry Andric // We lost track of the caller of the associated function, any kernel
4807344a3780SDimitry Andric // could reach now.
4808344a3780SDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint();
4809344a3780SDimitry Andric
4810344a3780SDimitry Andric return true;
4811344a3780SDimitry Andric };
4812344a3780SDimitry Andric
4813344a3780SDimitry Andric if (!A.checkForAllCallSites(PredCallSite, *this,
4814344a3780SDimitry Andric true /* RequireAllCallSites */,
4815f65dcba8SDimitry Andric AllReachingKernelsKnown))
4816344a3780SDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint();
4817344a3780SDimitry Andric }
4818344a3780SDimitry Andric
4819344a3780SDimitry Andric /// Update info regarding parallel levels.
updateParallelLevels__anon7bbaa8dc0111::AAKernelInfoFunction4820344a3780SDimitry Andric void updateParallelLevels(Attributor &A) {
4821344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4822344a3780SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI =
4823344a3780SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
4824344a3780SDimitry Andric
4825344a3780SDimitry Andric auto PredCallSite = [&](AbstractCallSite ACS) {
4826344a3780SDimitry Andric Function *Caller = ACS.getInstruction()->getFunction();
4827344a3780SDimitry Andric
4828344a3780SDimitry Andric assert(Caller && "Caller is nullptr");
4829344a3780SDimitry Andric
48307fa27ce4SDimitry Andric auto *CAA =
4831344a3780SDimitry Andric A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
48327fa27ce4SDimitry Andric if (CAA && CAA->ParallelLevels.isValidState()) {
4833344a3780SDimitry Andric // Any function that is called by `__kmpc_parallel_51` will not be
4834344a3780SDimitry Andric // folded as the parallel level in the function is updated. In order to
4835344a3780SDimitry Andric // get it right, all the analysis would depend on the implentation. That
4836344a3780SDimitry Andric // said, if in the future any change to the implementation, the analysis
4837344a3780SDimitry Andric // could be wrong. As a consequence, we are just conservative here.
4838344a3780SDimitry Andric if (Caller == Parallel51RFI.Declaration) {
4839344a3780SDimitry Andric ParallelLevels.indicatePessimisticFixpoint();
4840344a3780SDimitry Andric return true;
4841344a3780SDimitry Andric }
4842344a3780SDimitry Andric
48437fa27ce4SDimitry Andric ParallelLevels ^= CAA->ParallelLevels;
4844344a3780SDimitry Andric
4845344a3780SDimitry Andric return true;
4846344a3780SDimitry Andric }
4847344a3780SDimitry Andric
4848344a3780SDimitry Andric // We lost track of the caller of the associated function, any kernel
4849344a3780SDimitry Andric // could reach now.
4850344a3780SDimitry Andric ParallelLevels.indicatePessimisticFixpoint();
4851344a3780SDimitry Andric
4852344a3780SDimitry Andric return true;
4853344a3780SDimitry Andric };
4854344a3780SDimitry Andric
4855344a3780SDimitry Andric bool AllCallSitesKnown = true;
4856344a3780SDimitry Andric if (!A.checkForAllCallSites(PredCallSite, *this,
4857344a3780SDimitry Andric true /* RequireAllCallSites */,
4858344a3780SDimitry Andric AllCallSitesKnown))
4859344a3780SDimitry Andric ParallelLevels.indicatePessimisticFixpoint();
4860344a3780SDimitry Andric }
4861344a3780SDimitry Andric };
4862344a3780SDimitry Andric
4863344a3780SDimitry Andric /// The call site kernel info abstract attribute, basically, what can we say
4864344a3780SDimitry Andric /// about a call site with regards to the KernelInfoState. For now this simply
4865344a3780SDimitry Andric /// forwards the information from the callee.
4866344a3780SDimitry Andric struct AAKernelInfoCallSite : AAKernelInfo {
AAKernelInfoCallSite__anon7bbaa8dc0111::AAKernelInfoCallSite4867344a3780SDimitry Andric AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
4868344a3780SDimitry Andric : AAKernelInfo(IRP, A) {}
4869344a3780SDimitry Andric
4870344a3780SDimitry Andric /// See AbstractAttribute::initialize(...).
initialize__anon7bbaa8dc0111::AAKernelInfoCallSite4871344a3780SDimitry Andric void initialize(Attributor &A) override {
4872344a3780SDimitry Andric AAKernelInfo::initialize(A);
4873344a3780SDimitry Andric
4874344a3780SDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue());
48757fa27ce4SDimitry Andric auto *AssumptionAA = A.getAAFor<AAAssumptionInfo>(
4876c0981da4SDimitry Andric *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
4877344a3780SDimitry Andric
4878344a3780SDimitry Andric // Check for SPMD-mode assumptions.
48797fa27ce4SDimitry Andric if (AssumptionAA && AssumptionAA->hasAssumption("ompx_spmd_amenable")) {
4880c0981da4SDimitry Andric indicateOptimisticFixpoint();
4881b1c73532SDimitry Andric return;
4882c0981da4SDimitry Andric }
4883344a3780SDimitry Andric
4884344a3780SDimitry Andric // First weed out calls we do not care about, that is readonly/readnone
4885344a3780SDimitry Andric // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
4886344a3780SDimitry Andric // parallel region or anything else we are looking for.
4887344a3780SDimitry Andric if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
4888344a3780SDimitry Andric indicateOptimisticFixpoint();
4889344a3780SDimitry Andric return;
4890344a3780SDimitry Andric }
4891344a3780SDimitry Andric
4892344a3780SDimitry Andric // Next we check if we know the callee. If it is a known OpenMP function
4893344a3780SDimitry Andric // we will handle them explicitly in the switch below. If it is not, we
4894344a3780SDimitry Andric // will use an AAKernelInfo object on the callee to gather information and
4895344a3780SDimitry Andric // merge that into the current state. The latter happens in the updateImpl.
4896b1c73532SDimitry Andric auto CheckCallee = [&](Function *Callee, unsigned NumCallees) {
4897344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4898344a3780SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
4899344a3780SDimitry Andric if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
4900344a3780SDimitry Andric // Unknown caller or declarations are not analyzable, we give up.
4901344a3780SDimitry Andric if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
4902344a3780SDimitry Andric
4903344a3780SDimitry Andric // Unknown callees might contain parallel regions, except if they have
4904344a3780SDimitry Andric // an appropriate assumption attached.
49057fa27ce4SDimitry Andric if (!AssumptionAA ||
49067fa27ce4SDimitry Andric !(AssumptionAA->hasAssumption("omp_no_openmp") ||
49077fa27ce4SDimitry Andric AssumptionAA->hasAssumption("omp_no_parallelism")))
4908344a3780SDimitry Andric ReachedUnknownParallelRegions.insert(&CB);
4909344a3780SDimitry Andric
4910344a3780SDimitry Andric // If SPMDCompatibilityTracker is not fixed, we need to give up on the
4911344a3780SDimitry Andric // idea we can run something unknown in SPMD-mode.
4912c0981da4SDimitry Andric if (!SPMDCompatibilityTracker.isAtFixpoint()) {
4913c0981da4SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4914344a3780SDimitry Andric SPMDCompatibilityTracker.insert(&CB);
4915c0981da4SDimitry Andric }
4916344a3780SDimitry Andric
4917b1c73532SDimitry Andric // We have updated the state for this unknown call properly, there
4918b1c73532SDimitry Andric // won't be any change so we indicate a fixpoint.
4919344a3780SDimitry Andric indicateOptimisticFixpoint();
4920344a3780SDimitry Andric }
4921b1c73532SDimitry Andric // If the callee is known and can be used in IPO, we will update the
4922b1c73532SDimitry Andric // state based on the callee state in updateImpl.
4923b1c73532SDimitry Andric return;
4924b1c73532SDimitry Andric }
4925b1c73532SDimitry Andric if (NumCallees > 1) {
4926b1c73532SDimitry Andric indicatePessimisticFixpoint();
4927344a3780SDimitry Andric return;
4928344a3780SDimitry Andric }
4929344a3780SDimitry Andric
4930344a3780SDimitry Andric RuntimeFunction RF = It->getSecond();
4931344a3780SDimitry Andric switch (RF) {
4932344a3780SDimitry Andric // All the functions we know are compatible with SPMD mode.
4933344a3780SDimitry Andric case OMPRTL___kmpc_is_spmd_exec_mode:
4934c0981da4SDimitry Andric case OMPRTL___kmpc_distribute_static_fini:
4935344a3780SDimitry Andric case OMPRTL___kmpc_for_static_fini:
4936344a3780SDimitry Andric case OMPRTL___kmpc_global_thread_num:
4937344a3780SDimitry Andric case OMPRTL___kmpc_get_hardware_num_threads_in_block:
4938344a3780SDimitry Andric case OMPRTL___kmpc_get_hardware_num_blocks:
4939344a3780SDimitry Andric case OMPRTL___kmpc_single:
4940344a3780SDimitry Andric case OMPRTL___kmpc_end_single:
4941344a3780SDimitry Andric case OMPRTL___kmpc_master:
4942344a3780SDimitry Andric case OMPRTL___kmpc_end_master:
4943344a3780SDimitry Andric case OMPRTL___kmpc_barrier:
494477fc4c14SDimitry Andric case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2:
494577fc4c14SDimitry Andric case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2:
4946b1c73532SDimitry Andric case OMPRTL___kmpc_error:
4947b1c73532SDimitry Andric case OMPRTL___kmpc_flush:
4948b1c73532SDimitry Andric case OMPRTL___kmpc_get_hardware_thread_id_in_block:
4949b1c73532SDimitry Andric case OMPRTL___kmpc_get_warp_size:
4950b1c73532SDimitry Andric case OMPRTL_omp_get_thread_num:
4951b1c73532SDimitry Andric case OMPRTL_omp_get_num_threads:
4952b1c73532SDimitry Andric case OMPRTL_omp_get_max_threads:
4953b1c73532SDimitry Andric case OMPRTL_omp_in_parallel:
4954b1c73532SDimitry Andric case OMPRTL_omp_get_dynamic:
4955b1c73532SDimitry Andric case OMPRTL_omp_get_cancellation:
4956b1c73532SDimitry Andric case OMPRTL_omp_get_nested:
4957b1c73532SDimitry Andric case OMPRTL_omp_get_schedule:
4958b1c73532SDimitry Andric case OMPRTL_omp_get_thread_limit:
4959b1c73532SDimitry Andric case OMPRTL_omp_get_supported_active_levels:
4960b1c73532SDimitry Andric case OMPRTL_omp_get_max_active_levels:
4961b1c73532SDimitry Andric case OMPRTL_omp_get_level:
4962b1c73532SDimitry Andric case OMPRTL_omp_get_ancestor_thread_num:
4963b1c73532SDimitry Andric case OMPRTL_omp_get_team_size:
4964b1c73532SDimitry Andric case OMPRTL_omp_get_active_level:
4965b1c73532SDimitry Andric case OMPRTL_omp_in_final:
4966b1c73532SDimitry Andric case OMPRTL_omp_get_proc_bind:
4967b1c73532SDimitry Andric case OMPRTL_omp_get_num_places:
4968b1c73532SDimitry Andric case OMPRTL_omp_get_num_procs:
4969b1c73532SDimitry Andric case OMPRTL_omp_get_place_proc_ids:
4970b1c73532SDimitry Andric case OMPRTL_omp_get_place_num:
4971b1c73532SDimitry Andric case OMPRTL_omp_get_partition_num_places:
4972b1c73532SDimitry Andric case OMPRTL_omp_get_partition_place_nums:
4973b1c73532SDimitry Andric case OMPRTL_omp_get_wtime:
4974344a3780SDimitry Andric break;
4975c0981da4SDimitry Andric case OMPRTL___kmpc_distribute_static_init_4:
4976c0981da4SDimitry Andric case OMPRTL___kmpc_distribute_static_init_4u:
4977c0981da4SDimitry Andric case OMPRTL___kmpc_distribute_static_init_8:
4978c0981da4SDimitry Andric case OMPRTL___kmpc_distribute_static_init_8u:
4979344a3780SDimitry Andric case OMPRTL___kmpc_for_static_init_4:
4980344a3780SDimitry Andric case OMPRTL___kmpc_for_static_init_4u:
4981344a3780SDimitry Andric case OMPRTL___kmpc_for_static_init_8:
4982344a3780SDimitry Andric case OMPRTL___kmpc_for_static_init_8u: {
4983344a3780SDimitry Andric // Check the schedule and allow static schedule in SPMD mode.
4984344a3780SDimitry Andric unsigned ScheduleArgOpNo = 2;
4985344a3780SDimitry Andric auto *ScheduleTypeCI =
4986344a3780SDimitry Andric dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
4987344a3780SDimitry Andric unsigned ScheduleTypeVal =
4988344a3780SDimitry Andric ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
4989344a3780SDimitry Andric switch (OMPScheduleType(ScheduleTypeVal)) {
4990145449b1SDimitry Andric case OMPScheduleType::UnorderedStatic:
4991145449b1SDimitry Andric case OMPScheduleType::UnorderedStaticChunked:
4992145449b1SDimitry Andric case OMPScheduleType::OrderedDistribute:
4993145449b1SDimitry Andric case OMPScheduleType::OrderedDistributeChunked:
4994344a3780SDimitry Andric break;
4995344a3780SDimitry Andric default:
4996c0981da4SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
4997344a3780SDimitry Andric SPMDCompatibilityTracker.insert(&CB);
4998344a3780SDimitry Andric break;
4999344a3780SDimitry Andric };
5000344a3780SDimitry Andric } break;
5001344a3780SDimitry Andric case OMPRTL___kmpc_target_init:
5002344a3780SDimitry Andric KernelInitCB = &CB;
5003344a3780SDimitry Andric break;
5004344a3780SDimitry Andric case OMPRTL___kmpc_target_deinit:
5005344a3780SDimitry Andric KernelDeinitCB = &CB;
5006344a3780SDimitry Andric break;
5007344a3780SDimitry Andric case OMPRTL___kmpc_parallel_51:
5008b1c73532SDimitry Andric if (!handleParallel51(A, CB))
5009b1c73532SDimitry Andric indicatePessimisticFixpoint();
5010b1c73532SDimitry Andric return;
5011344a3780SDimitry Andric case OMPRTL___kmpc_omp_task:
5012344a3780SDimitry Andric // We do not look into tasks right now, just give up.
501377fc4c14SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
5014344a3780SDimitry Andric SPMDCompatibilityTracker.insert(&CB);
5015344a3780SDimitry Andric ReachedUnknownParallelRegions.insert(&CB);
5016344a3780SDimitry Andric break;
5017344a3780SDimitry Andric case OMPRTL___kmpc_alloc_shared:
5018344a3780SDimitry Andric case OMPRTL___kmpc_free_shared:
5019344a3780SDimitry Andric // Return without setting a fixpoint, to be resolved in updateImpl.
5020344a3780SDimitry Andric return;
5021344a3780SDimitry Andric default:
5022344a3780SDimitry Andric // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
5023c0981da4SDimitry Andric // generally. However, they do not hide parallel regions.
502477fc4c14SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
5025344a3780SDimitry Andric SPMDCompatibilityTracker.insert(&CB);
5026344a3780SDimitry Andric break;
5027344a3780SDimitry Andric }
5028344a3780SDimitry Andric // All other OpenMP runtime calls will not reach parallel regions so they
5029b1c73532SDimitry Andric // can be safely ignored for now. Since it is a known OpenMP runtime call
5030b1c73532SDimitry Andric // we have now modeled all effects and there is no need for any update.
5031344a3780SDimitry Andric indicateOptimisticFixpoint();
5032b1c73532SDimitry Andric };
5033b1c73532SDimitry Andric
5034b1c73532SDimitry Andric const auto *AACE =
5035b1c73532SDimitry Andric A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::OPTIONAL);
5036b1c73532SDimitry Andric if (!AACE || !AACE->getState().isValidState() || AACE->hasUnknownCallee()) {
5037b1c73532SDimitry Andric CheckCallee(getAssociatedFunction(), 1);
5038b1c73532SDimitry Andric return;
5039b1c73532SDimitry Andric }
5040b1c73532SDimitry Andric const auto &OptimisticEdges = AACE->getOptimisticEdges();
5041b1c73532SDimitry Andric for (auto *Callee : OptimisticEdges) {
5042b1c73532SDimitry Andric CheckCallee(Callee, OptimisticEdges.size());
5043b1c73532SDimitry Andric if (isAtFixpoint())
5044b1c73532SDimitry Andric break;
5045b1c73532SDimitry Andric }
5046344a3780SDimitry Andric }
5047344a3780SDimitry Andric
updateImpl__anon7bbaa8dc0111::AAKernelInfoCallSite5048344a3780SDimitry Andric ChangeStatus updateImpl(Attributor &A) override {
5049344a3780SDimitry Andric // TODO: Once we have call site specific value information we can provide
5050344a3780SDimitry Andric // call site specific liveness information and then it makes
5051344a3780SDimitry Andric // sense to specialize attributes for call sites arguments instead of
5052344a3780SDimitry Andric // redirecting requests to the callee argument.
5053344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
5054b1c73532SDimitry Andric KernelInfoState StateBefore = getState();
5055b1c73532SDimitry Andric
5056b1c73532SDimitry Andric auto CheckCallee = [&](Function *F, int NumCallees) {
5057344a3780SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
5058344a3780SDimitry Andric
5059b1c73532SDimitry Andric // If F is not a runtime function, propagate the AAKernelInfo of the
5060b1c73532SDimitry Andric // callee.
5061344a3780SDimitry Andric if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
5062344a3780SDimitry Andric const IRPosition &FnPos = IRPosition::function(*F);
5063b1c73532SDimitry Andric auto *FnAA =
5064b1c73532SDimitry Andric A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
50657fa27ce4SDimitry Andric if (!FnAA)
50667fa27ce4SDimitry Andric return indicatePessimisticFixpoint();
50677fa27ce4SDimitry Andric if (getState() == FnAA->getState())
5068344a3780SDimitry Andric return ChangeStatus::UNCHANGED;
50697fa27ce4SDimitry Andric getState() = FnAA->getState();
5070344a3780SDimitry Andric return ChangeStatus::CHANGED;
5071344a3780SDimitry Andric }
5072b1c73532SDimitry Andric if (NumCallees > 1)
5073b1c73532SDimitry Andric return indicatePessimisticFixpoint();
5074b1c73532SDimitry Andric
5075b1c73532SDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue());
5076b1c73532SDimitry Andric if (It->getSecond() == OMPRTL___kmpc_parallel_51) {
5077b1c73532SDimitry Andric if (!handleParallel51(A, CB))
5078b1c73532SDimitry Andric return indicatePessimisticFixpoint();
5079b1c73532SDimitry Andric return StateBefore == getState() ? ChangeStatus::UNCHANGED
5080b1c73532SDimitry Andric : ChangeStatus::CHANGED;
5081b1c73532SDimitry Andric }
5082344a3780SDimitry Andric
5083344a3780SDimitry Andric // F is a runtime function that allocates or frees memory, check
5084344a3780SDimitry Andric // AAHeapToStack and AAHeapToShared.
5085b1c73532SDimitry Andric assert(
5086b1c73532SDimitry Andric (It->getSecond() == OMPRTL___kmpc_alloc_shared ||
5087344a3780SDimitry Andric It->getSecond() == OMPRTL___kmpc_free_shared) &&
5088344a3780SDimitry Andric "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call");
5089344a3780SDimitry Andric
50907fa27ce4SDimitry Andric auto *HeapToStackAA = A.getAAFor<AAHeapToStack>(
5091344a3780SDimitry Andric *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
50927fa27ce4SDimitry Andric auto *HeapToSharedAA = A.getAAFor<AAHeapToShared>(
5093344a3780SDimitry Andric *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
5094344a3780SDimitry Andric
5095344a3780SDimitry Andric RuntimeFunction RF = It->getSecond();
5096344a3780SDimitry Andric
5097344a3780SDimitry Andric switch (RF) {
5098344a3780SDimitry Andric // If neither HeapToStack nor HeapToShared assume the call is removed,
5099344a3780SDimitry Andric // assume SPMD incompatibility.
5100344a3780SDimitry Andric case OMPRTL___kmpc_alloc_shared:
51017fa27ce4SDimitry Andric if ((!HeapToStackAA || !HeapToStackAA->isAssumedHeapToStack(CB)) &&
51027fa27ce4SDimitry Andric (!HeapToSharedAA || !HeapToSharedAA->isAssumedHeapToShared(CB)))
5103344a3780SDimitry Andric SPMDCompatibilityTracker.insert(&CB);
5104344a3780SDimitry Andric break;
5105344a3780SDimitry Andric case OMPRTL___kmpc_free_shared:
51067fa27ce4SDimitry Andric if ((!HeapToStackAA ||
51077fa27ce4SDimitry Andric !HeapToStackAA->isAssumedHeapToStackRemovedFree(CB)) &&
51087fa27ce4SDimitry Andric (!HeapToSharedAA ||
51097fa27ce4SDimitry Andric !HeapToSharedAA->isAssumedHeapToSharedRemovedFree(CB)))
5110344a3780SDimitry Andric SPMDCompatibilityTracker.insert(&CB);
5111344a3780SDimitry Andric break;
5112344a3780SDimitry Andric default:
511377fc4c14SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint();
5114344a3780SDimitry Andric SPMDCompatibilityTracker.insert(&CB);
5115344a3780SDimitry Andric }
5116b1c73532SDimitry Andric return ChangeStatus::CHANGED;
5117b1c73532SDimitry Andric };
5118b1c73532SDimitry Andric
5119b1c73532SDimitry Andric const auto *AACE =
5120b1c73532SDimitry Andric A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::OPTIONAL);
5121b1c73532SDimitry Andric if (!AACE || !AACE->getState().isValidState() || AACE->hasUnknownCallee()) {
5122b1c73532SDimitry Andric if (Function *F = getAssociatedFunction())
5123b1c73532SDimitry Andric CheckCallee(F, /*NumCallees=*/1);
5124b1c73532SDimitry Andric } else {
5125b1c73532SDimitry Andric const auto &OptimisticEdges = AACE->getOptimisticEdges();
5126b1c73532SDimitry Andric for (auto *Callee : OptimisticEdges) {
5127b1c73532SDimitry Andric CheckCallee(Callee, OptimisticEdges.size());
5128b1c73532SDimitry Andric if (isAtFixpoint())
5129b1c73532SDimitry Andric break;
5130b1c73532SDimitry Andric }
5131b1c73532SDimitry Andric }
5132344a3780SDimitry Andric
5133344a3780SDimitry Andric return StateBefore == getState() ? ChangeStatus::UNCHANGED
5134344a3780SDimitry Andric : ChangeStatus::CHANGED;
5135344a3780SDimitry Andric }
5136b1c73532SDimitry Andric
5137b1c73532SDimitry Andric /// Deal with a __kmpc_parallel_51 call (\p CB). Returns true if the call was
5138b1c73532SDimitry Andric /// handled, if a problem occurred, false is returned.
handleParallel51__anon7bbaa8dc0111::AAKernelInfoCallSite5139b1c73532SDimitry Andric bool handleParallel51(Attributor &A, CallBase &CB) {
5140b1c73532SDimitry Andric const unsigned int NonWrapperFunctionArgNo = 5;
5141b1c73532SDimitry Andric const unsigned int WrapperFunctionArgNo = 6;
5142b1c73532SDimitry Andric auto ParallelRegionOpArgNo = SPMDCompatibilityTracker.isAssumed()
5143b1c73532SDimitry Andric ? NonWrapperFunctionArgNo
5144b1c73532SDimitry Andric : WrapperFunctionArgNo;
5145b1c73532SDimitry Andric
5146b1c73532SDimitry Andric auto *ParallelRegion = dyn_cast<Function>(
5147b1c73532SDimitry Andric CB.getArgOperand(ParallelRegionOpArgNo)->stripPointerCasts());
5148b1c73532SDimitry Andric if (!ParallelRegion)
5149b1c73532SDimitry Andric return false;
5150b1c73532SDimitry Andric
5151b1c73532SDimitry Andric ReachedKnownParallelRegions.insert(&CB);
5152b1c73532SDimitry Andric /// Check nested parallelism
5153b1c73532SDimitry Andric auto *FnAA = A.getAAFor<AAKernelInfo>(
5154b1c73532SDimitry Andric *this, IRPosition::function(*ParallelRegion), DepClassTy::OPTIONAL);
5155b1c73532SDimitry Andric NestedParallelism |= !FnAA || !FnAA->getState().isValidState() ||
5156b1c73532SDimitry Andric !FnAA->ReachedKnownParallelRegions.empty() ||
5157b1c73532SDimitry Andric !FnAA->ReachedKnownParallelRegions.isValidState() ||
5158b1c73532SDimitry Andric !FnAA->ReachedUnknownParallelRegions.isValidState() ||
5159b1c73532SDimitry Andric !FnAA->ReachedUnknownParallelRegions.empty();
5160b1c73532SDimitry Andric return true;
5161b1c73532SDimitry Andric }
5162344a3780SDimitry Andric };
5163344a3780SDimitry Andric
5164344a3780SDimitry Andric struct AAFoldRuntimeCall
5165344a3780SDimitry Andric : public StateWrapper<BooleanState, AbstractAttribute> {
5166344a3780SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>;
5167344a3780SDimitry Andric
AAFoldRuntimeCall__anon7bbaa8dc0111::AAFoldRuntimeCall5168344a3780SDimitry Andric AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
5169344a3780SDimitry Andric
5170344a3780SDimitry Andric /// Statistics are tracked as part of manifest for now.
trackStatistics__anon7bbaa8dc0111::AAFoldRuntimeCall5171344a3780SDimitry Andric void trackStatistics() const override {}
5172344a3780SDimitry Andric
5173344a3780SDimitry Andric /// Create an abstract attribute biew for the position \p IRP.
5174344a3780SDimitry Andric static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP,
5175344a3780SDimitry Andric Attributor &A);
5176344a3780SDimitry Andric
5177344a3780SDimitry Andric /// See AbstractAttribute::getName()
getName__anon7bbaa8dc0111::AAFoldRuntimeCall5178344a3780SDimitry Andric const std::string getName() const override { return "AAFoldRuntimeCall"; }
5179344a3780SDimitry Andric
5180344a3780SDimitry Andric /// See AbstractAttribute::getIdAddr()
getIdAddr__anon7bbaa8dc0111::AAFoldRuntimeCall5181344a3780SDimitry Andric const char *getIdAddr() const override { return &ID; }
5182344a3780SDimitry Andric
5183344a3780SDimitry Andric /// This function should return true if the type of the \p AA is
5184344a3780SDimitry Andric /// AAFoldRuntimeCall
classof__anon7bbaa8dc0111::AAFoldRuntimeCall5185344a3780SDimitry Andric static bool classof(const AbstractAttribute *AA) {
5186344a3780SDimitry Andric return (AA->getIdAddr() == &ID);
5187344a3780SDimitry Andric }
5188344a3780SDimitry Andric
5189344a3780SDimitry Andric static const char ID;
5190344a3780SDimitry Andric };
5191344a3780SDimitry Andric
5192344a3780SDimitry Andric struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
AAFoldRuntimeCallCallSiteReturned__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5193344a3780SDimitry Andric AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A)
5194344a3780SDimitry Andric : AAFoldRuntimeCall(IRP, A) {}
5195344a3780SDimitry Andric
5196344a3780SDimitry Andric /// See AbstractAttribute::getAsStr()
getAsStr__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned51977fa27ce4SDimitry Andric const std::string getAsStr(Attributor *) const override {
5198344a3780SDimitry Andric if (!isValidState())
5199344a3780SDimitry Andric return "<invalid>";
5200344a3780SDimitry Andric
5201344a3780SDimitry Andric std::string Str("simplified value: ");
5202344a3780SDimitry Andric
5203145449b1SDimitry Andric if (!SimplifiedValue)
5204344a3780SDimitry Andric return Str + std::string("none");
5205344a3780SDimitry Andric
5206e3b55780SDimitry Andric if (!*SimplifiedValue)
5207344a3780SDimitry Andric return Str + std::string("nullptr");
5208344a3780SDimitry Andric
5209e3b55780SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(*SimplifiedValue))
5210344a3780SDimitry Andric return Str + std::to_string(CI->getSExtValue());
5211344a3780SDimitry Andric
5212344a3780SDimitry Andric return Str + std::string("unknown");
5213344a3780SDimitry Andric }
5214344a3780SDimitry Andric
initialize__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5215344a3780SDimitry Andric void initialize(Attributor &A) override {
5216c0981da4SDimitry Andric if (DisableOpenMPOptFolding)
5217c0981da4SDimitry Andric indicatePessimisticFixpoint();
5218c0981da4SDimitry Andric
5219344a3780SDimitry Andric Function *Callee = getAssociatedFunction();
5220344a3780SDimitry Andric
5221344a3780SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
5222344a3780SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
5223344a3780SDimitry Andric assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&
5224344a3780SDimitry Andric "Expected a known OpenMP runtime function");
5225344a3780SDimitry Andric
5226344a3780SDimitry Andric RFKind = It->getSecond();
5227344a3780SDimitry Andric
5228344a3780SDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue());
5229344a3780SDimitry Andric A.registerSimplificationCallback(
5230344a3780SDimitry Andric IRPosition::callsite_returned(CB),
5231344a3780SDimitry Andric [&](const IRPosition &IRP, const AbstractAttribute *AA,
5232e3b55780SDimitry Andric bool &UsedAssumedInformation) -> std::optional<Value *> {
5233145449b1SDimitry Andric assert((isValidState() ||
5234e3b55780SDimitry Andric (SimplifiedValue && *SimplifiedValue == nullptr)) &&
5235344a3780SDimitry Andric "Unexpected invalid state!");
5236344a3780SDimitry Andric
5237344a3780SDimitry Andric if (!isAtFixpoint()) {
5238344a3780SDimitry Andric UsedAssumedInformation = true;
5239344a3780SDimitry Andric if (AA)
5240344a3780SDimitry Andric A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
5241344a3780SDimitry Andric }
5242344a3780SDimitry Andric return SimplifiedValue;
5243344a3780SDimitry Andric });
5244344a3780SDimitry Andric }
5245344a3780SDimitry Andric
updateImpl__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5246344a3780SDimitry Andric ChangeStatus updateImpl(Attributor &A) override {
5247344a3780SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED;
5248344a3780SDimitry Andric switch (RFKind) {
5249344a3780SDimitry Andric case OMPRTL___kmpc_is_spmd_exec_mode:
5250344a3780SDimitry Andric Changed |= foldIsSPMDExecMode(A);
5251344a3780SDimitry Andric break;
5252344a3780SDimitry Andric case OMPRTL___kmpc_parallel_level:
5253344a3780SDimitry Andric Changed |= foldParallelLevel(A);
5254344a3780SDimitry Andric break;
5255344a3780SDimitry Andric case OMPRTL___kmpc_get_hardware_num_threads_in_block:
5256344a3780SDimitry Andric Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit");
5257344a3780SDimitry Andric break;
5258344a3780SDimitry Andric case OMPRTL___kmpc_get_hardware_num_blocks:
5259344a3780SDimitry Andric Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams");
5260344a3780SDimitry Andric break;
5261344a3780SDimitry Andric default:
5262344a3780SDimitry Andric llvm_unreachable("Unhandled OpenMP runtime function!");
5263344a3780SDimitry Andric }
5264344a3780SDimitry Andric
5265344a3780SDimitry Andric return Changed;
5266344a3780SDimitry Andric }
5267344a3780SDimitry Andric
manifest__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5268344a3780SDimitry Andric ChangeStatus manifest(Attributor &A) override {
5269344a3780SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED;
5270344a3780SDimitry Andric
5271145449b1SDimitry Andric if (SimplifiedValue && *SimplifiedValue) {
5272c0981da4SDimitry Andric Instruction &I = *getCtxI();
5273145449b1SDimitry Andric A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue);
5274c0981da4SDimitry Andric A.deleteAfterManifest(I);
5275344a3780SDimitry Andric
5276c0981da4SDimitry Andric CallBase *CB = dyn_cast<CallBase>(&I);
5277c0981da4SDimitry Andric auto Remark = [&](OptimizationRemark OR) {
5278c0981da4SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
5279c0981da4SDimitry Andric return OR << "Replacing OpenMP runtime call "
5280c0981da4SDimitry Andric << CB->getCalledFunction()->getName() << " with "
5281c0981da4SDimitry Andric << ore::NV("FoldedValue", C->getZExtValue()) << ".";
5282c0981da4SDimitry Andric return OR << "Replacing OpenMP runtime call "
5283c0981da4SDimitry Andric << CB->getCalledFunction()->getName() << ".";
5284c0981da4SDimitry Andric };
5285c0981da4SDimitry Andric
5286c0981da4SDimitry Andric if (CB && EnableVerboseRemarks)
5287c0981da4SDimitry Andric A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
5288c0981da4SDimitry Andric
5289c0981da4SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "
5290344a3780SDimitry Andric << **SimplifiedValue << "\n");
5291344a3780SDimitry Andric
5292344a3780SDimitry Andric Changed = ChangeStatus::CHANGED;
5293344a3780SDimitry Andric }
5294344a3780SDimitry Andric
5295344a3780SDimitry Andric return Changed;
5296344a3780SDimitry Andric }
5297344a3780SDimitry Andric
indicatePessimisticFixpoint__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5298344a3780SDimitry Andric ChangeStatus indicatePessimisticFixpoint() override {
5299344a3780SDimitry Andric SimplifiedValue = nullptr;
5300344a3780SDimitry Andric return AAFoldRuntimeCall::indicatePessimisticFixpoint();
5301344a3780SDimitry Andric }
5302344a3780SDimitry Andric
5303344a3780SDimitry Andric private:
5304344a3780SDimitry Andric /// Fold __kmpc_is_spmd_exec_mode into a constant if possible.
foldIsSPMDExecMode__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5305344a3780SDimitry Andric ChangeStatus foldIsSPMDExecMode(Attributor &A) {
5306e3b55780SDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue;
5307344a3780SDimitry Andric
5308344a3780SDimitry Andric unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
5309344a3780SDimitry Andric unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
53107fa27ce4SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
5311344a3780SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
5312344a3780SDimitry Andric
53137fa27ce4SDimitry Andric if (!CallerKernelInfoAA ||
53147fa27ce4SDimitry Andric !CallerKernelInfoAA->ReachingKernelEntries.isValidState())
5315344a3780SDimitry Andric return indicatePessimisticFixpoint();
5316344a3780SDimitry Andric
53177fa27ce4SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
53187fa27ce4SDimitry Andric auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
5319344a3780SDimitry Andric DepClassTy::REQUIRED);
5320344a3780SDimitry Andric
53217fa27ce4SDimitry Andric if (!AA || !AA->isValidState()) {
5322344a3780SDimitry Andric SimplifiedValue = nullptr;
5323344a3780SDimitry Andric return indicatePessimisticFixpoint();
5324344a3780SDimitry Andric }
5325344a3780SDimitry Andric
53267fa27ce4SDimitry Andric if (AA->SPMDCompatibilityTracker.isAssumed()) {
53277fa27ce4SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint())
5328344a3780SDimitry Andric ++KnownSPMDCount;
5329344a3780SDimitry Andric else
5330344a3780SDimitry Andric ++AssumedSPMDCount;
5331344a3780SDimitry Andric } else {
53327fa27ce4SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint())
5333344a3780SDimitry Andric ++KnownNonSPMDCount;
5334344a3780SDimitry Andric else
5335344a3780SDimitry Andric ++AssumedNonSPMDCount;
5336344a3780SDimitry Andric }
5337344a3780SDimitry Andric }
5338344a3780SDimitry Andric
5339344a3780SDimitry Andric if ((AssumedSPMDCount + KnownSPMDCount) &&
5340344a3780SDimitry Andric (AssumedNonSPMDCount + KnownNonSPMDCount))
5341344a3780SDimitry Andric return indicatePessimisticFixpoint();
5342344a3780SDimitry Andric
5343344a3780SDimitry Andric auto &Ctx = getAnchorValue().getContext();
5344344a3780SDimitry Andric if (KnownSPMDCount || AssumedSPMDCount) {
5345344a3780SDimitry Andric assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
5346344a3780SDimitry Andric "Expected only SPMD kernels!");
5347344a3780SDimitry Andric // All reaching kernels are in SPMD mode. Update all function calls to
5348344a3780SDimitry Andric // __kmpc_is_spmd_exec_mode to 1.
5349344a3780SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
5350344a3780SDimitry Andric } else if (KnownNonSPMDCount || AssumedNonSPMDCount) {
5351344a3780SDimitry Andric assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
5352344a3780SDimitry Andric "Expected only non-SPMD kernels!");
5353344a3780SDimitry Andric // All reaching kernels are in non-SPMD mode. Update all function
5354344a3780SDimitry Andric // calls to __kmpc_is_spmd_exec_mode to 0.
5355344a3780SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false);
5356344a3780SDimitry Andric } else {
5357344a3780SDimitry Andric // We have empty reaching kernels, therefore we cannot tell if the
5358344a3780SDimitry Andric // associated call site can be folded. At this moment, SimplifiedValue
5359344a3780SDimitry Andric // must be none.
5360145449b1SDimitry Andric assert(!SimplifiedValue && "SimplifiedValue should be none");
5361344a3780SDimitry Andric }
5362344a3780SDimitry Andric
5363344a3780SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
5364344a3780SDimitry Andric : ChangeStatus::CHANGED;
5365344a3780SDimitry Andric }
5366344a3780SDimitry Andric
5367344a3780SDimitry Andric /// Fold __kmpc_parallel_level into a constant if possible.
foldParallelLevel__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5368344a3780SDimitry Andric ChangeStatus foldParallelLevel(Attributor &A) {
5369e3b55780SDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue;
5370344a3780SDimitry Andric
53717fa27ce4SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
5372344a3780SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
5373344a3780SDimitry Andric
53747fa27ce4SDimitry Andric if (!CallerKernelInfoAA ||
53757fa27ce4SDimitry Andric !CallerKernelInfoAA->ParallelLevels.isValidState())
5376344a3780SDimitry Andric return indicatePessimisticFixpoint();
5377344a3780SDimitry Andric
53787fa27ce4SDimitry Andric if (!CallerKernelInfoAA->ReachingKernelEntries.isValidState())
5379344a3780SDimitry Andric return indicatePessimisticFixpoint();
5380344a3780SDimitry Andric
53817fa27ce4SDimitry Andric if (CallerKernelInfoAA->ReachingKernelEntries.empty()) {
5382145449b1SDimitry Andric assert(!SimplifiedValue &&
5383344a3780SDimitry Andric "SimplifiedValue should keep none at this point");
5384344a3780SDimitry Andric return ChangeStatus::UNCHANGED;
5385344a3780SDimitry Andric }
5386344a3780SDimitry Andric
5387344a3780SDimitry Andric unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
5388344a3780SDimitry Andric unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
53897fa27ce4SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
53907fa27ce4SDimitry Andric auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
5391344a3780SDimitry Andric DepClassTy::REQUIRED);
53927fa27ce4SDimitry Andric if (!AA || !AA->SPMDCompatibilityTracker.isValidState())
5393344a3780SDimitry Andric return indicatePessimisticFixpoint();
5394344a3780SDimitry Andric
53957fa27ce4SDimitry Andric if (AA->SPMDCompatibilityTracker.isAssumed()) {
53967fa27ce4SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint())
5397344a3780SDimitry Andric ++KnownSPMDCount;
5398344a3780SDimitry Andric else
5399344a3780SDimitry Andric ++AssumedSPMDCount;
5400344a3780SDimitry Andric } else {
54017fa27ce4SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint())
5402344a3780SDimitry Andric ++KnownNonSPMDCount;
5403344a3780SDimitry Andric else
5404344a3780SDimitry Andric ++AssumedNonSPMDCount;
5405344a3780SDimitry Andric }
5406344a3780SDimitry Andric }
5407344a3780SDimitry Andric
5408344a3780SDimitry Andric if ((AssumedSPMDCount + KnownSPMDCount) &&
5409344a3780SDimitry Andric (AssumedNonSPMDCount + KnownNonSPMDCount))
5410344a3780SDimitry Andric return indicatePessimisticFixpoint();
5411344a3780SDimitry Andric
5412344a3780SDimitry Andric auto &Ctx = getAnchorValue().getContext();
5413344a3780SDimitry Andric // If the caller can only be reached by SPMD kernel entries, the parallel
5414344a3780SDimitry Andric // level is 1. Similarly, if the caller can only be reached by non-SPMD
5415344a3780SDimitry Andric // kernel entries, it is 0.
5416344a3780SDimitry Andric if (AssumedSPMDCount || KnownSPMDCount) {
5417344a3780SDimitry Andric assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
5418344a3780SDimitry Andric "Expected only SPMD kernels!");
5419344a3780SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
5420344a3780SDimitry Andric } else {
5421344a3780SDimitry Andric assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
5422344a3780SDimitry Andric "Expected only non-SPMD kernels!");
5423344a3780SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0);
5424344a3780SDimitry Andric }
5425344a3780SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
5426344a3780SDimitry Andric : ChangeStatus::CHANGED;
5427344a3780SDimitry Andric }
5428344a3780SDimitry Andric
foldKernelFnAttribute__anon7bbaa8dc0111::AAFoldRuntimeCallCallSiteReturned5429344a3780SDimitry Andric ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) {
5430344a3780SDimitry Andric // Specialize only if all the calls agree with the attribute constant value
5431344a3780SDimitry Andric int32_t CurrentAttrValue = -1;
5432e3b55780SDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue;
5433344a3780SDimitry Andric
54347fa27ce4SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
5435344a3780SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
5436344a3780SDimitry Andric
54377fa27ce4SDimitry Andric if (!CallerKernelInfoAA ||
54387fa27ce4SDimitry Andric !CallerKernelInfoAA->ReachingKernelEntries.isValidState())
5439344a3780SDimitry Andric return indicatePessimisticFixpoint();
5440344a3780SDimitry Andric
5441344a3780SDimitry Andric // Iterate over the kernels that reach this function
54427fa27ce4SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
5443e3b55780SDimitry Andric int32_t NextAttrVal = K->getFnAttributeAsParsedInteger(Attr, -1);
5444344a3780SDimitry Andric
5445344a3780SDimitry Andric if (NextAttrVal == -1 ||
5446344a3780SDimitry Andric (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal))
5447344a3780SDimitry Andric return indicatePessimisticFixpoint();
5448344a3780SDimitry Andric CurrentAttrValue = NextAttrVal;
5449344a3780SDimitry Andric }
5450344a3780SDimitry Andric
5451344a3780SDimitry Andric if (CurrentAttrValue != -1) {
5452344a3780SDimitry Andric auto &Ctx = getAnchorValue().getContext();
5453344a3780SDimitry Andric SimplifiedValue =
5454344a3780SDimitry Andric ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue);
5455344a3780SDimitry Andric }
5456344a3780SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
5457344a3780SDimitry Andric : ChangeStatus::CHANGED;
5458344a3780SDimitry Andric }
5459344a3780SDimitry Andric
5460344a3780SDimitry Andric /// An optional value the associated value is assumed to fold to. That is, we
5461344a3780SDimitry Andric /// assume the associated value (which is a call) can be replaced by this
5462344a3780SDimitry Andric /// simplified value.
5463e3b55780SDimitry Andric std::optional<Value *> SimplifiedValue;
5464344a3780SDimitry Andric
5465344a3780SDimitry Andric /// The runtime function kind of the callee of the associated call site.
5466344a3780SDimitry Andric RuntimeFunction RFKind;
5467344a3780SDimitry Andric };
5468344a3780SDimitry Andric
5469cfca06d7SDimitry Andric } // namespace
5470cfca06d7SDimitry Andric
5471344a3780SDimitry Andric /// Register folding callsite
registerFoldRuntimeCall(RuntimeFunction RF)5472344a3780SDimitry Andric void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
5473344a3780SDimitry Andric auto &RFI = OMPInfoCache.RFIs[RF];
5474344a3780SDimitry Andric RFI.foreachUse(SCC, [&](Use &U, Function &F) {
5475344a3780SDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI);
5476344a3780SDimitry Andric if (!CI)
5477344a3780SDimitry Andric return false;
5478344a3780SDimitry Andric A.getOrCreateAAFor<AAFoldRuntimeCall>(
5479344a3780SDimitry Andric IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr,
5480344a3780SDimitry Andric DepClassTy::NONE, /* ForceUpdate */ false,
5481344a3780SDimitry Andric /* UpdateAfterInit */ false);
5482344a3780SDimitry Andric return false;
5483344a3780SDimitry Andric });
5484344a3780SDimitry Andric }
5485344a3780SDimitry Andric
registerAAs(bool IsModulePass)5486344a3780SDimitry Andric void OpenMPOpt::registerAAs(bool IsModulePass) {
5487344a3780SDimitry Andric if (SCC.empty())
5488344a3780SDimitry Andric return;
5489145449b1SDimitry Andric
5490344a3780SDimitry Andric if (IsModulePass) {
5491344a3780SDimitry Andric // Ensure we create the AAKernelInfo AAs first and without triggering an
5492344a3780SDimitry Andric // update. This will make sure we register all value simplification
5493344a3780SDimitry Andric // callbacks before any other AA has the chance to create an AAValueSimplify
5494344a3780SDimitry Andric // or similar.
5495145449b1SDimitry Andric auto CreateKernelInfoCB = [&](Use &, Function &Kernel) {
5496344a3780SDimitry Andric A.getOrCreateAAFor<AAKernelInfo>(
5497145449b1SDimitry Andric IRPosition::function(Kernel), /* QueryingAA */ nullptr,
5498344a3780SDimitry Andric DepClassTy::NONE, /* ForceUpdate */ false,
5499344a3780SDimitry Andric /* UpdateAfterInit */ false);
5500145449b1SDimitry Andric return false;
5501145449b1SDimitry Andric };
5502145449b1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &InitRFI =
5503145449b1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
5504145449b1SDimitry Andric InitRFI.foreachUse(SCC, CreateKernelInfoCB);
5505344a3780SDimitry Andric
5506344a3780SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
5507344a3780SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
5508344a3780SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block);
5509344a3780SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks);
5510344a3780SDimitry Andric }
5511344a3780SDimitry Andric
5512344a3780SDimitry Andric // Create CallSite AA for all Getters.
5513e3b55780SDimitry Andric if (DeduceICVValues) {
5514344a3780SDimitry Andric for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
5515344a3780SDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
5516344a3780SDimitry Andric
5517344a3780SDimitry Andric auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
5518344a3780SDimitry Andric
5519344a3780SDimitry Andric auto CreateAA = [&](Use &U, Function &Caller) {
5520344a3780SDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
5521344a3780SDimitry Andric if (!CI)
5522344a3780SDimitry Andric return false;
5523344a3780SDimitry Andric
5524344a3780SDimitry Andric auto &CB = cast<CallBase>(*CI);
5525344a3780SDimitry Andric
5526344a3780SDimitry Andric IRPosition CBPos = IRPosition::callsite_function(CB);
5527344a3780SDimitry Andric A.getOrCreateAAFor<AAICVTracker>(CBPos);
5528344a3780SDimitry Andric return false;
5529344a3780SDimitry Andric };
5530344a3780SDimitry Andric
5531344a3780SDimitry Andric GetterRFI.foreachUse(SCC, CreateAA);
5532344a3780SDimitry Andric }
5533e3b55780SDimitry Andric }
5534344a3780SDimitry Andric
5535344a3780SDimitry Andric // Create an ExecutionDomain AA for every function and a HeapToStack AA for
5536344a3780SDimitry Andric // every function if there is a device kernel.
5537344a3780SDimitry Andric if (!isOpenMPDevice(M))
5538344a3780SDimitry Andric return;
5539344a3780SDimitry Andric
5540344a3780SDimitry Andric for (auto *F : SCC) {
5541344a3780SDimitry Andric if (F->isDeclaration())
5542344a3780SDimitry Andric continue;
5543344a3780SDimitry Andric
5544e3b55780SDimitry Andric // We look at internal functions only on-demand but if any use is not a
5545e3b55780SDimitry Andric // direct call or outside the current set of analyzed functions, we have
5546e3b55780SDimitry Andric // to do it eagerly.
5547e3b55780SDimitry Andric if (F->hasLocalLinkage()) {
5548e3b55780SDimitry Andric if (llvm::all_of(F->uses(), [this](const Use &U) {
5549e3b55780SDimitry Andric const auto *CB = dyn_cast<CallBase>(U.getUser());
5550e3b55780SDimitry Andric return CB && CB->isCallee(&U) &&
5551e3b55780SDimitry Andric A.isRunOn(const_cast<Function *>(CB->getCaller()));
5552e3b55780SDimitry Andric }))
5553e3b55780SDimitry Andric continue;
5554e3b55780SDimitry Andric }
5555e3b55780SDimitry Andric registerAAsForFunction(A, *F);
5556e3b55780SDimitry Andric }
5557e3b55780SDimitry Andric }
5558344a3780SDimitry Andric
registerAAsForFunction(Attributor & A,const Function & F)5559e3b55780SDimitry Andric void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
5560e3b55780SDimitry Andric if (!DisableOpenMPOptDeglobalization)
5561e3b55780SDimitry Andric A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
5562e3b55780SDimitry Andric A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(F));
5563e3b55780SDimitry Andric if (!DisableOpenMPOptDeglobalization)
5564e3b55780SDimitry Andric A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(F));
55657fa27ce4SDimitry Andric if (F.hasFnAttribute(Attribute::Convergent))
55667fa27ce4SDimitry Andric A.getOrCreateAAFor<AANonConvergent>(IRPosition::function(F));
5567e3b55780SDimitry Andric
5568e3b55780SDimitry Andric for (auto &I : instructions(F)) {
5569344a3780SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) {
5570344a3780SDimitry Andric bool UsedAssumedInformation = false;
5571344a3780SDimitry Andric A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
55724b4fe385SDimitry Andric UsedAssumedInformation, AA::Interprocedural);
5573e3b55780SDimitry Andric continue;
5574e3b55780SDimitry Andric }
5575b1c73532SDimitry Andric if (auto *CI = dyn_cast<CallBase>(&I)) {
5576b1c73532SDimitry Andric if (CI->isIndirectCall())
5577b1c73532SDimitry Andric A.getOrCreateAAFor<AAIndirectCallInfo>(
5578b1c73532SDimitry Andric IRPosition::callsite_function(*CI));
5579b1c73532SDimitry Andric }
5580e3b55780SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) {
55816f8fc217SDimitry Andric A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
5582e3b55780SDimitry Andric continue;
5583e3b55780SDimitry Andric }
55847fa27ce4SDimitry Andric if (auto *FI = dyn_cast<FenceInst>(&I)) {
55857fa27ce4SDimitry Andric A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*FI));
55867fa27ce4SDimitry Andric continue;
55877fa27ce4SDimitry Andric }
5588e3b55780SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
5589e3b55780SDimitry Andric if (II->getIntrinsicID() == Intrinsic::assume) {
5590e3b55780SDimitry Andric A.getOrCreateAAFor<AAPotentialValues>(
5591e3b55780SDimitry Andric IRPosition::value(*II->getArgOperand(0)));
5592e3b55780SDimitry Andric continue;
5593344a3780SDimitry Andric }
5594344a3780SDimitry Andric }
5595344a3780SDimitry Andric }
5596344a3780SDimitry Andric }
5597344a3780SDimitry Andric
5598cfca06d7SDimitry Andric const char AAICVTracker::ID = 0;
5599344a3780SDimitry Andric const char AAKernelInfo::ID = 0;
5600344a3780SDimitry Andric const char AAExecutionDomain::ID = 0;
5601344a3780SDimitry Andric const char AAHeapToShared::ID = 0;
5602344a3780SDimitry Andric const char AAFoldRuntimeCall::ID = 0;
5603cfca06d7SDimitry Andric
createForPosition(const IRPosition & IRP,Attributor & A)5604cfca06d7SDimitry Andric AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
5605cfca06d7SDimitry Andric Attributor &A) {
5606cfca06d7SDimitry Andric AAICVTracker *AA = nullptr;
5607cfca06d7SDimitry Andric switch (IRP.getPositionKind()) {
5608cfca06d7SDimitry Andric case IRPosition::IRP_INVALID:
5609cfca06d7SDimitry Andric case IRPosition::IRP_FLOAT:
5610cfca06d7SDimitry Andric case IRPosition::IRP_ARGUMENT:
5611cfca06d7SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT:
5612cfca06d7SDimitry Andric llvm_unreachable("ICVTracker can only be created for function position!");
5613b60736ecSDimitry Andric case IRPosition::IRP_RETURNED:
5614b60736ecSDimitry Andric AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
5615b60736ecSDimitry Andric break;
5616b60736ecSDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED:
5617b60736ecSDimitry Andric AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
5618b60736ecSDimitry Andric break;
5619b60736ecSDimitry Andric case IRPosition::IRP_CALL_SITE:
5620b60736ecSDimitry Andric AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
5621b60736ecSDimitry Andric break;
5622cfca06d7SDimitry Andric case IRPosition::IRP_FUNCTION:
5623cfca06d7SDimitry Andric AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
5624cfca06d7SDimitry Andric break;
5625cfca06d7SDimitry Andric }
5626cfca06d7SDimitry Andric
5627cfca06d7SDimitry Andric return *AA;
5628cfca06d7SDimitry Andric }
5629cfca06d7SDimitry Andric
createForPosition(const IRPosition & IRP,Attributor & A)5630344a3780SDimitry Andric AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
5631344a3780SDimitry Andric Attributor &A) {
5632344a3780SDimitry Andric AAExecutionDomainFunction *AA = nullptr;
5633344a3780SDimitry Andric switch (IRP.getPositionKind()) {
5634344a3780SDimitry Andric case IRPosition::IRP_INVALID:
5635344a3780SDimitry Andric case IRPosition::IRP_FLOAT:
5636344a3780SDimitry Andric case IRPosition::IRP_ARGUMENT:
5637344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT:
5638344a3780SDimitry Andric case IRPosition::IRP_RETURNED:
5639344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED:
5640344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE:
5641344a3780SDimitry Andric llvm_unreachable(
5642344a3780SDimitry Andric "AAExecutionDomain can only be created for function position!");
5643344a3780SDimitry Andric case IRPosition::IRP_FUNCTION:
5644344a3780SDimitry Andric AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
5645344a3780SDimitry Andric break;
5646344a3780SDimitry Andric }
5647344a3780SDimitry Andric
5648344a3780SDimitry Andric return *AA;
5649344a3780SDimitry Andric }
5650344a3780SDimitry Andric
createForPosition(const IRPosition & IRP,Attributor & A)5651344a3780SDimitry Andric AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
5652344a3780SDimitry Andric Attributor &A) {
5653344a3780SDimitry Andric AAHeapToSharedFunction *AA = nullptr;
5654344a3780SDimitry Andric switch (IRP.getPositionKind()) {
5655344a3780SDimitry Andric case IRPosition::IRP_INVALID:
5656344a3780SDimitry Andric case IRPosition::IRP_FLOAT:
5657344a3780SDimitry Andric case IRPosition::IRP_ARGUMENT:
5658344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT:
5659344a3780SDimitry Andric case IRPosition::IRP_RETURNED:
5660344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED:
5661344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE:
5662344a3780SDimitry Andric llvm_unreachable(
5663344a3780SDimitry Andric "AAHeapToShared can only be created for function position!");
5664344a3780SDimitry Andric case IRPosition::IRP_FUNCTION:
5665344a3780SDimitry Andric AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
5666344a3780SDimitry Andric break;
5667344a3780SDimitry Andric }
5668344a3780SDimitry Andric
5669344a3780SDimitry Andric return *AA;
5670344a3780SDimitry Andric }
5671344a3780SDimitry Andric
createForPosition(const IRPosition & IRP,Attributor & A)5672344a3780SDimitry Andric AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
5673344a3780SDimitry Andric Attributor &A) {
5674344a3780SDimitry Andric AAKernelInfo *AA = nullptr;
5675344a3780SDimitry Andric switch (IRP.getPositionKind()) {
5676344a3780SDimitry Andric case IRPosition::IRP_INVALID:
5677344a3780SDimitry Andric case IRPosition::IRP_FLOAT:
5678344a3780SDimitry Andric case IRPosition::IRP_ARGUMENT:
5679344a3780SDimitry Andric case IRPosition::IRP_RETURNED:
5680344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED:
5681344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT:
5682344a3780SDimitry Andric llvm_unreachable("KernelInfo can only be created for function position!");
5683344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE:
5684344a3780SDimitry Andric AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
5685344a3780SDimitry Andric break;
5686344a3780SDimitry Andric case IRPosition::IRP_FUNCTION:
5687344a3780SDimitry Andric AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
5688344a3780SDimitry Andric break;
5689344a3780SDimitry Andric }
5690344a3780SDimitry Andric
5691344a3780SDimitry Andric return *AA;
5692344a3780SDimitry Andric }
5693344a3780SDimitry Andric
createForPosition(const IRPosition & IRP,Attributor & A)5694344a3780SDimitry Andric AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP,
5695344a3780SDimitry Andric Attributor &A) {
5696344a3780SDimitry Andric AAFoldRuntimeCall *AA = nullptr;
5697344a3780SDimitry Andric switch (IRP.getPositionKind()) {
5698344a3780SDimitry Andric case IRPosition::IRP_INVALID:
5699344a3780SDimitry Andric case IRPosition::IRP_FLOAT:
5700344a3780SDimitry Andric case IRPosition::IRP_ARGUMENT:
5701344a3780SDimitry Andric case IRPosition::IRP_RETURNED:
5702344a3780SDimitry Andric case IRPosition::IRP_FUNCTION:
5703344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE:
5704344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT:
5705344a3780SDimitry Andric llvm_unreachable("KernelInfo can only be created for call site position!");
5706344a3780SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED:
5707344a3780SDimitry Andric AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A);
5708344a3780SDimitry Andric break;
5709344a3780SDimitry Andric }
5710344a3780SDimitry Andric
5711344a3780SDimitry Andric return *AA;
5712344a3780SDimitry Andric }
5713344a3780SDimitry Andric
run(Module & M,ModuleAnalysisManager & AM)5714344a3780SDimitry Andric PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
5715344a3780SDimitry Andric if (!containsOpenMP(M))
5716344a3780SDimitry Andric return PreservedAnalyses::all();
5717344a3780SDimitry Andric if (DisableOpenMPOptimizations)
5718cfca06d7SDimitry Andric return PreservedAnalyses::all();
5719cfca06d7SDimitry Andric
5720344a3780SDimitry Andric FunctionAnalysisManager &FAM =
5721344a3780SDimitry Andric AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
5722344a3780SDimitry Andric KernelSet Kernels = getDeviceKernels(M);
5723344a3780SDimitry Andric
5724145449b1SDimitry Andric if (PrintModuleBeforeOptimizations)
5725145449b1SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M);
5726145449b1SDimitry Andric
5727344a3780SDimitry Andric auto IsCalled = [&](Function &F) {
5728344a3780SDimitry Andric if (Kernels.contains(&F))
5729344a3780SDimitry Andric return true;
5730344a3780SDimitry Andric for (const User *U : F.users())
5731344a3780SDimitry Andric if (!isa<BlockAddress>(U))
5732344a3780SDimitry Andric return true;
5733344a3780SDimitry Andric return false;
5734344a3780SDimitry Andric };
5735344a3780SDimitry Andric
5736344a3780SDimitry Andric auto EmitRemark = [&](Function &F) {
5737344a3780SDimitry Andric auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
5738344a3780SDimitry Andric ORE.emit([&]() {
5739344a3780SDimitry Andric OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
5740344a3780SDimitry Andric return ORA << "Could not internalize function. "
5741c0981da4SDimitry Andric << "Some optimizations may not be possible. [OMP140]";
5742344a3780SDimitry Andric });
5743344a3780SDimitry Andric };
5744344a3780SDimitry Andric
57457fa27ce4SDimitry Andric bool Changed = false;
57467fa27ce4SDimitry Andric
5747344a3780SDimitry Andric // Create internal copies of each function if this is a kernel Module. This
5748344a3780SDimitry Andric // allows iterprocedural passes to see every call edge.
5749c0981da4SDimitry Andric DenseMap<Function *, Function *> InternalizedMap;
5750c0981da4SDimitry Andric if (isOpenMPDevice(M)) {
5751c0981da4SDimitry Andric SmallPtrSet<Function *, 16> InternalizeFns;
5752344a3780SDimitry Andric for (Function &F : M)
5753344a3780SDimitry Andric if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
5754344a3780SDimitry Andric !DisableInternalization) {
5755c0981da4SDimitry Andric if (Attributor::isInternalizable(F)) {
5756c0981da4SDimitry Andric InternalizeFns.insert(&F);
5757344a3780SDimitry Andric } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
5758344a3780SDimitry Andric EmitRemark(F);
5759344a3780SDimitry Andric }
5760344a3780SDimitry Andric }
5761344a3780SDimitry Andric
57627fa27ce4SDimitry Andric Changed |=
5763c0981da4SDimitry Andric Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
5764c0981da4SDimitry Andric }
5765c0981da4SDimitry Andric
5766344a3780SDimitry Andric // Look at every function in the Module unless it was internalized.
5767e3b55780SDimitry Andric SetVector<Function *> Functions;
5768344a3780SDimitry Andric SmallVector<Function *, 16> SCC;
5769344a3780SDimitry Andric for (Function &F : M)
5770e3b55780SDimitry Andric if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) {
5771344a3780SDimitry Andric SCC.push_back(&F);
5772e3b55780SDimitry Andric Functions.insert(&F);
5773e3b55780SDimitry Andric }
5774344a3780SDimitry Andric
5775344a3780SDimitry Andric if (SCC.empty())
57767fa27ce4SDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
5777344a3780SDimitry Andric
5778344a3780SDimitry Andric AnalysisGetter AG(FAM);
5779344a3780SDimitry Andric
5780344a3780SDimitry Andric auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
5781344a3780SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
5782344a3780SDimitry Andric };
5783344a3780SDimitry Andric
5784344a3780SDimitry Andric BumpPtrAllocator Allocator;
5785344a3780SDimitry Andric CallGraphUpdater CGUpdater;
5786344a3780SDimitry Andric
57877fa27ce4SDimitry Andric bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
57887fa27ce4SDimitry Andric LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
57897fa27ce4SDimitry Andric OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, PostLink);
5790344a3780SDimitry Andric
5791c0981da4SDimitry Andric unsigned MaxFixpointIterations =
5792c0981da4SDimitry Andric (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
5793145449b1SDimitry Andric
5794145449b1SDimitry Andric AttributorConfig AC(CGUpdater);
5795145449b1SDimitry Andric AC.DefaultInitializeLiveInternals = false;
5796e3b55780SDimitry Andric AC.IsModulePass = true;
5797145449b1SDimitry Andric AC.RewriteSignatures = false;
5798145449b1SDimitry Andric AC.MaxFixpointIterations = MaxFixpointIterations;
5799145449b1SDimitry Andric AC.OREGetter = OREGetter;
5800145449b1SDimitry Andric AC.PassName = DEBUG_TYPE;
5801e3b55780SDimitry Andric AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
58027fa27ce4SDimitry Andric AC.IPOAmendableCB = [](const Function &F) {
58037fa27ce4SDimitry Andric return F.hasFnAttribute("kernel");
58047fa27ce4SDimitry Andric };
5805145449b1SDimitry Andric
5806145449b1SDimitry Andric Attributor A(Functions, InfoCache, AC);
5807344a3780SDimitry Andric
5808344a3780SDimitry Andric OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
58097fa27ce4SDimitry Andric Changed |= OMPOpt.run(true);
5810c0981da4SDimitry Andric
5811c0981da4SDimitry Andric // Optionally inline device functions for potentially better performance.
5812c0981da4SDimitry Andric if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
5813c0981da4SDimitry Andric for (Function &F : M)
5814c0981da4SDimitry Andric if (!F.isDeclaration() && !Kernels.contains(&F) &&
5815c0981da4SDimitry Andric !F.hasFnAttribute(Attribute::NoInline))
5816c0981da4SDimitry Andric F.addFnAttr(Attribute::AlwaysInline);
5817c0981da4SDimitry Andric
5818c0981da4SDimitry Andric if (PrintModuleAfterOptimizations)
5819c0981da4SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);
5820c0981da4SDimitry Andric
5821344a3780SDimitry Andric if (Changed)
5822344a3780SDimitry Andric return PreservedAnalyses::none();
5823344a3780SDimitry Andric
5824344a3780SDimitry Andric return PreservedAnalyses::all();
5825344a3780SDimitry Andric }
5826344a3780SDimitry Andric
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult & UR)5827344a3780SDimitry Andric PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
5828344a3780SDimitry Andric CGSCCAnalysisManager &AM,
5829344a3780SDimitry Andric LazyCallGraph &CG,
5830344a3780SDimitry Andric CGSCCUpdateResult &UR) {
5831344a3780SDimitry Andric if (!containsOpenMP(*C.begin()->getFunction().getParent()))
5832344a3780SDimitry Andric return PreservedAnalyses::all();
5833cfca06d7SDimitry Andric if (DisableOpenMPOptimizations)
5834cfca06d7SDimitry Andric return PreservedAnalyses::all();
5835cfca06d7SDimitry Andric
5836cfca06d7SDimitry Andric SmallVector<Function *, 16> SCC;
5837b60736ecSDimitry Andric // If there are kernels in the module, we have to run on all SCC's.
5838b60736ecSDimitry Andric for (LazyCallGraph::Node &N : C) {
5839b60736ecSDimitry Andric Function *Fn = &N.getFunction();
5840b60736ecSDimitry Andric SCC.push_back(Fn);
5841b60736ecSDimitry Andric }
5842b60736ecSDimitry Andric
5843344a3780SDimitry Andric if (SCC.empty())
5844cfca06d7SDimitry Andric return PreservedAnalyses::all();
5845cfca06d7SDimitry Andric
5846344a3780SDimitry Andric Module &M = *C.begin()->getFunction().getParent();
5847344a3780SDimitry Andric
5848145449b1SDimitry Andric if (PrintModuleBeforeOptimizations)
5849145449b1SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M);
5850145449b1SDimitry Andric
5851344a3780SDimitry Andric KernelSet Kernels = getDeviceKernels(M);
5852344a3780SDimitry Andric
5853cfca06d7SDimitry Andric FunctionAnalysisManager &FAM =
5854cfca06d7SDimitry Andric AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
5855cfca06d7SDimitry Andric
5856cfca06d7SDimitry Andric AnalysisGetter AG(FAM);
5857cfca06d7SDimitry Andric
5858cfca06d7SDimitry Andric auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
5859cfca06d7SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
5860cfca06d7SDimitry Andric };
5861cfca06d7SDimitry Andric
5862344a3780SDimitry Andric BumpPtrAllocator Allocator;
5863cfca06d7SDimitry Andric CallGraphUpdater CGUpdater;
5864cfca06d7SDimitry Andric CGUpdater.initialize(CG, C, AM, UR);
5865cfca06d7SDimitry Andric
58667fa27ce4SDimitry Andric bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
58677fa27ce4SDimitry Andric LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
5868cfca06d7SDimitry Andric SetVector<Function *> Functions(SCC.begin(), SCC.end());
5869cfca06d7SDimitry Andric OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
58707fa27ce4SDimitry Andric /*CGSCC*/ &Functions, PostLink);
5871cfca06d7SDimitry Andric
5872c0981da4SDimitry Andric unsigned MaxFixpointIterations =
5873c0981da4SDimitry Andric (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
5874145449b1SDimitry Andric
5875145449b1SDimitry Andric AttributorConfig AC(CGUpdater);
5876145449b1SDimitry Andric AC.DefaultInitializeLiveInternals = false;
5877145449b1SDimitry Andric AC.IsModulePass = false;
5878145449b1SDimitry Andric AC.RewriteSignatures = false;
5879145449b1SDimitry Andric AC.MaxFixpointIterations = MaxFixpointIterations;
5880145449b1SDimitry Andric AC.OREGetter = OREGetter;
5881145449b1SDimitry Andric AC.PassName = DEBUG_TYPE;
5882e3b55780SDimitry Andric AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
5883145449b1SDimitry Andric
5884145449b1SDimitry Andric Attributor A(Functions, InfoCache, AC);
5885cfca06d7SDimitry Andric
5886cfca06d7SDimitry Andric OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
5887344a3780SDimitry Andric bool Changed = OMPOpt.run(false);
5888c0981da4SDimitry Andric
5889c0981da4SDimitry Andric if (PrintModuleAfterOptimizations)
5890c0981da4SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
5891c0981da4SDimitry Andric
5892cfca06d7SDimitry Andric if (Changed)
5893cfca06d7SDimitry Andric return PreservedAnalyses::none();
5894cfca06d7SDimitry Andric
5895cfca06d7SDimitry Andric return PreservedAnalyses::all();
5896cfca06d7SDimitry Andric }
5897cfca06d7SDimitry Andric
isOpenMPKernel(Function & Fn)5898b1c73532SDimitry Andric bool llvm::omp::isOpenMPKernel(Function &Fn) {
5899b1c73532SDimitry Andric return Fn.hasFnAttribute("kernel");
5900b1c73532SDimitry Andric }
59017fa27ce4SDimitry Andric
getDeviceKernels(Module & M)5902344a3780SDimitry Andric KernelSet llvm::omp::getDeviceKernels(Module &M) {
5903344a3780SDimitry Andric // TODO: Create a more cross-platform way of determining device kernels.
5904e3b55780SDimitry Andric NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations");
5905344a3780SDimitry Andric KernelSet Kernels;
5906344a3780SDimitry Andric
5907cfca06d7SDimitry Andric if (!MD)
5908344a3780SDimitry Andric return Kernels;
5909cfca06d7SDimitry Andric
5910cfca06d7SDimitry Andric for (auto *Op : MD->operands()) {
5911cfca06d7SDimitry Andric if (Op->getNumOperands() < 2)
5912cfca06d7SDimitry Andric continue;
5913cfca06d7SDimitry Andric MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
5914cfca06d7SDimitry Andric if (!KindID || KindID->getString() != "kernel")
5915cfca06d7SDimitry Andric continue;
5916cfca06d7SDimitry Andric
5917cfca06d7SDimitry Andric Function *KernelFn =
5918cfca06d7SDimitry Andric mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
5919cfca06d7SDimitry Andric if (!KernelFn)
5920cfca06d7SDimitry Andric continue;
5921cfca06d7SDimitry Andric
5922b1c73532SDimitry Andric // We are only interested in OpenMP target regions. Others, such as kernels
5923b1c73532SDimitry Andric // generated by CUDA but linked together, are not interesting to this pass.
5924b1c73532SDimitry Andric if (isOpenMPKernel(*KernelFn)) {
5925cfca06d7SDimitry Andric ++NumOpenMPTargetRegionKernels;
5926cfca06d7SDimitry Andric Kernels.insert(KernelFn);
5927b1c73532SDimitry Andric } else
5928b1c73532SDimitry Andric ++NumNonOpenMPTargetRegionKernels;
5929cfca06d7SDimitry Andric }
5930344a3780SDimitry Andric
5931344a3780SDimitry Andric return Kernels;
5932cfca06d7SDimitry Andric }
5933cfca06d7SDimitry Andric
containsOpenMP(Module & M)5934344a3780SDimitry Andric bool llvm::omp::containsOpenMP(Module &M) {
5935344a3780SDimitry Andric Metadata *MD = M.getModuleFlag("openmp");
5936344a3780SDimitry Andric if (!MD)
5937344a3780SDimitry Andric return false;
5938cfca06d7SDimitry Andric
5939cfca06d7SDimitry Andric return true;
5940cfca06d7SDimitry Andric }
5941cfca06d7SDimitry Andric
isOpenMPDevice(Module & M)5942344a3780SDimitry Andric bool llvm::omp::isOpenMPDevice(Module &M) {
5943344a3780SDimitry Andric Metadata *MD = M.getModuleFlag("openmp-device");
5944344a3780SDimitry Andric if (!MD)
5945344a3780SDimitry Andric return false;
5946344a3780SDimitry Andric
5947344a3780SDimitry Andric return true;
5948cfca06d7SDimitry Andric }
5949