1b60736ecSDimitry Andric //===-- SIProgramInfo.cpp ----------------------------------------------===//
2b60736ecSDimitry Andric //
3b60736ecSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b60736ecSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5b60736ecSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b60736ecSDimitry Andric //
7b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
8b60736ecSDimitry Andric //
9b60736ecSDimitry Andric /// \file
10b60736ecSDimitry Andric ///
11b60736ecSDimitry Andric /// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12b60736ecSDimitry Andric /// entry functions.
13b60736ecSDimitry Andric //
14b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
15b60736ecSDimitry Andric //
16b60736ecSDimitry Andric
17b60736ecSDimitry Andric #include "SIProgramInfo.h"
18312c0ed1SDimitry Andric #include "GCNSubtarget.h"
19b60736ecSDimitry Andric #include "SIDefines.h"
20b60736ecSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
21ac9a064cSDimitry Andric #include "llvm/MC/MCExpr.h"
22b60736ecSDimitry Andric
23b60736ecSDimitry Andric using namespace llvm;
24b60736ecSDimitry Andric
reset(const MachineFunction & MF)25ac9a064cSDimitry Andric void SIProgramInfo::reset(const MachineFunction &MF) {
26ac9a064cSDimitry Andric MCContext &Ctx = MF.getContext();
27ac9a064cSDimitry Andric
28ac9a064cSDimitry Andric const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
29ac9a064cSDimitry Andric
30ac9a064cSDimitry Andric VGPRBlocks = ZeroExpr;
31ac9a064cSDimitry Andric SGPRBlocks = ZeroExpr;
32ac9a064cSDimitry Andric Priority = 0;
33ac9a064cSDimitry Andric FloatMode = 0;
34ac9a064cSDimitry Andric Priv = 0;
35ac9a064cSDimitry Andric DX10Clamp = 0;
36ac9a064cSDimitry Andric DebugMode = 0;
37ac9a064cSDimitry Andric IEEEMode = 0;
38ac9a064cSDimitry Andric WgpMode = 0;
39ac9a064cSDimitry Andric MemOrdered = 0;
40ac9a064cSDimitry Andric RrWgMode = 0;
41ac9a064cSDimitry Andric ScratchSize = ZeroExpr;
42ac9a064cSDimitry Andric
43ac9a064cSDimitry Andric LDSBlocks = 0;
44ac9a064cSDimitry Andric ScratchBlocks = ZeroExpr;
45ac9a064cSDimitry Andric
46ac9a064cSDimitry Andric ScratchEnable = ZeroExpr;
47ac9a064cSDimitry Andric UserSGPR = 0;
48ac9a064cSDimitry Andric TrapHandlerEnable = 0;
49ac9a064cSDimitry Andric TGIdXEnable = 0;
50ac9a064cSDimitry Andric TGIdYEnable = 0;
51ac9a064cSDimitry Andric TGIdZEnable = 0;
52ac9a064cSDimitry Andric TGSizeEnable = 0;
53ac9a064cSDimitry Andric TIdIGCompCount = 0;
54ac9a064cSDimitry Andric EXCPEnMSB = 0;
55ac9a064cSDimitry Andric LdsSize = 0;
56ac9a064cSDimitry Andric EXCPEnable = 0;
57ac9a064cSDimitry Andric
58ac9a064cSDimitry Andric ComputePGMRSrc3GFX90A = ZeroExpr;
59ac9a064cSDimitry Andric
60ac9a064cSDimitry Andric NumVGPR = ZeroExpr;
61ac9a064cSDimitry Andric NumArchVGPR = ZeroExpr;
62ac9a064cSDimitry Andric NumAccVGPR = ZeroExpr;
63ac9a064cSDimitry Andric AccumOffset = ZeroExpr;
64ac9a064cSDimitry Andric TgSplit = 0;
65ac9a064cSDimitry Andric NumSGPR = ZeroExpr;
66ac9a064cSDimitry Andric SGPRSpill = 0;
67ac9a064cSDimitry Andric VGPRSpill = 0;
68ac9a064cSDimitry Andric LDSSize = 0;
69ac9a064cSDimitry Andric FlatUsed = ZeroExpr;
70ac9a064cSDimitry Andric
71ac9a064cSDimitry Andric NumSGPRsForWavesPerEU = ZeroExpr;
72ac9a064cSDimitry Andric NumVGPRsForWavesPerEU = ZeroExpr;
73ac9a064cSDimitry Andric Occupancy = ZeroExpr;
74ac9a064cSDimitry Andric DynamicCallStack = ZeroExpr;
75ac9a064cSDimitry Andric VCCUsed = ZeroExpr;
76ac9a064cSDimitry Andric }
77ac9a064cSDimitry Andric
getComputePGMRSrc1Reg(const SIProgramInfo & ProgInfo,const GCNSubtarget & ST)78ac9a064cSDimitry Andric static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
79ac9a064cSDimitry Andric const GCNSubtarget &ST) {
80ac9a064cSDimitry Andric uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
81ac9a064cSDimitry Andric S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
82ac9a064cSDimitry Andric S_00B848_PRIV(ProgInfo.Priv) |
83ac9a064cSDimitry Andric S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
84ac9a064cSDimitry Andric S_00B848_WGP_MODE(ProgInfo.WgpMode) |
85ac9a064cSDimitry Andric S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
86312c0ed1SDimitry Andric
87312c0ed1SDimitry Andric if (ST.hasDX10ClampMode())
88ac9a064cSDimitry Andric Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
89312c0ed1SDimitry Andric
90312c0ed1SDimitry Andric if (ST.hasIEEEMode())
91ac9a064cSDimitry Andric Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
92312c0ed1SDimitry Andric
93312c0ed1SDimitry Andric if (ST.hasRrWGMode())
94ac9a064cSDimitry Andric Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
95312c0ed1SDimitry Andric
96312c0ed1SDimitry Andric return Reg;
97b60736ecSDimitry Andric }
98b60736ecSDimitry Andric
getPGMRSrc1Reg(const SIProgramInfo & ProgInfo,CallingConv::ID CC,const GCNSubtarget & ST)99ac9a064cSDimitry Andric static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
100ac9a064cSDimitry Andric CallingConv::ID CC, const GCNSubtarget &ST) {
101ac9a064cSDimitry Andric uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
102ac9a064cSDimitry Andric S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
103ac9a064cSDimitry Andric S_00B848_PRIV(ProgInfo.Priv) |
104ac9a064cSDimitry Andric S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
105312c0ed1SDimitry Andric
106312c0ed1SDimitry Andric if (ST.hasDX10ClampMode())
107ac9a064cSDimitry Andric Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
108312c0ed1SDimitry Andric
109312c0ed1SDimitry Andric if (ST.hasIEEEMode())
110ac9a064cSDimitry Andric Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
111312c0ed1SDimitry Andric
112312c0ed1SDimitry Andric if (ST.hasRrWGMode())
113ac9a064cSDimitry Andric Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
114312c0ed1SDimitry Andric
115b60736ecSDimitry Andric switch (CC) {
116b60736ecSDimitry Andric case CallingConv::AMDGPU_PS:
117ac9a064cSDimitry Andric Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
118b60736ecSDimitry Andric break;
119b60736ecSDimitry Andric case CallingConv::AMDGPU_VS:
120ac9a064cSDimitry Andric Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
121b60736ecSDimitry Andric break;
122b60736ecSDimitry Andric case CallingConv::AMDGPU_GS:
123ac9a064cSDimitry Andric Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
124ac9a064cSDimitry Andric S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
125b60736ecSDimitry Andric break;
126b60736ecSDimitry Andric case CallingConv::AMDGPU_HS:
127ac9a064cSDimitry Andric Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
128ac9a064cSDimitry Andric S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
129b60736ecSDimitry Andric break;
130b60736ecSDimitry Andric default:
131b60736ecSDimitry Andric break;
132b60736ecSDimitry Andric }
133b60736ecSDimitry Andric return Reg;
134b60736ecSDimitry Andric }
1357fa27ce4SDimitry Andric
getComputePGMRSrc2Reg(const SIProgramInfo & ProgInfo)136ac9a064cSDimitry Andric static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) {
137ac9a064cSDimitry Andric uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
138ac9a064cSDimitry Andric S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) |
139ac9a064cSDimitry Andric S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
140ac9a064cSDimitry Andric S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) |
141ac9a064cSDimitry Andric S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) |
142ac9a064cSDimitry Andric S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) |
143ac9a064cSDimitry Andric S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) |
144ac9a064cSDimitry Andric S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) |
145ac9a064cSDimitry Andric S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
146ac9a064cSDimitry Andric S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
1477fa27ce4SDimitry Andric
1487fa27ce4SDimitry Andric return Reg;
1497fa27ce4SDimitry Andric }
1507fa27ce4SDimitry Andric
MaskShift(const MCExpr * Val,uint32_t Mask,uint32_t Shift,MCContext & Ctx)151ac9a064cSDimitry Andric static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
152ac9a064cSDimitry Andric MCContext &Ctx) {
153ac9a064cSDimitry Andric if (Mask) {
154ac9a064cSDimitry Andric const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
155ac9a064cSDimitry Andric Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
156ac9a064cSDimitry Andric }
157ac9a064cSDimitry Andric if (Shift) {
158ac9a064cSDimitry Andric const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
159ac9a064cSDimitry Andric Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
160ac9a064cSDimitry Andric }
161ac9a064cSDimitry Andric return Val;
162ac9a064cSDimitry Andric }
1637fa27ce4SDimitry Andric
getComputePGMRSrc1(const GCNSubtarget & ST,MCContext & Ctx) const164ac9a064cSDimitry Andric const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
165ac9a064cSDimitry Andric MCContext &Ctx) const {
166ac9a064cSDimitry Andric uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
167ac9a064cSDimitry Andric const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
168ac9a064cSDimitry Andric const MCExpr *Res = MCBinaryExpr::createOr(
169ac9a064cSDimitry Andric MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
170ac9a064cSDimitry Andric MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
171ac9a064cSDimitry Andric return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
172ac9a064cSDimitry Andric }
173ac9a064cSDimitry Andric
getPGMRSrc1(CallingConv::ID CC,const GCNSubtarget & ST,MCContext & Ctx) const174ac9a064cSDimitry Andric const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
175ac9a064cSDimitry Andric const GCNSubtarget &ST,
176ac9a064cSDimitry Andric MCContext &Ctx) const {
177ac9a064cSDimitry Andric if (AMDGPU::isCompute(CC)) {
178ac9a064cSDimitry Andric return getComputePGMRSrc1(ST, Ctx);
179ac9a064cSDimitry Andric }
180ac9a064cSDimitry Andric
181ac9a064cSDimitry Andric uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
182ac9a064cSDimitry Andric const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
183ac9a064cSDimitry Andric const MCExpr *Res = MCBinaryExpr::createOr(
184ac9a064cSDimitry Andric MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
185ac9a064cSDimitry Andric MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
186ac9a064cSDimitry Andric return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
187ac9a064cSDimitry Andric }
188ac9a064cSDimitry Andric
getComputePGMRSrc2(MCContext & Ctx) const189ac9a064cSDimitry Andric const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const {
190ac9a064cSDimitry Andric uint64_t Reg = getComputePGMRSrc2Reg(*this);
191ac9a064cSDimitry Andric const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
192ac9a064cSDimitry Andric return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
193ac9a064cSDimitry Andric }
194ac9a064cSDimitry Andric
getPGMRSrc2(CallingConv::ID CC,MCContext & Ctx) const195ac9a064cSDimitry Andric const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
196ac9a064cSDimitry Andric MCContext &Ctx) const {
197ac9a064cSDimitry Andric if (AMDGPU::isCompute(CC))
198ac9a064cSDimitry Andric return getComputePGMRSrc2(Ctx);
199ac9a064cSDimitry Andric
200ac9a064cSDimitry Andric return MCConstantExpr::create(0, Ctx);
2017fa27ce4SDimitry Andric }
202