1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55
56 #include "smu/smu_7_1_3_d.h"
57
58 #include "ivsrcid/ivsrcid_vislands30.h"
59
60 #define GFX8_NUM_GFX_RINGS 1
61 #define GFX8_MEC_HPD_SIZE 4096
62
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67
68 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
84
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD 1
87 #define CLE_BPM_SERDES_CMD 0
88
89 /* BPM Register Address*/
90 enum {
91 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
92 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
93 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
94 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
95 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
96 BPM_REG_FGCG_MAX
97 };
98
99 #define RLC_FormatDirectRegListLength 14
100
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196
197 static const u32 golden_settings_tonga_a11[] =
198 {
199 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202 mmGB_GPU_ID, 0x0000000f, 0x00000000,
203 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216
217 static const u32 tonga_golden_common_all[] =
218 {
219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307
308 static const u32 golden_settings_vegam_a11[] =
309 {
310 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320 mmSQ_CONFIG, 0x07f80000, 0x01180000,
321 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328
329 static const u32 vegam_golden_common_all[] =
330 {
331 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351 mmSQ_CONFIG, 0x07f80000, 0x01180000,
352 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359
360 static const u32 polaris11_golden_common_all[] =
361 {
362 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383 mmSQ_CONFIG, 0x07f80000, 0x07180000,
384 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390
391 static const u32 polaris10_golden_common_all[] =
392 {
393 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402
403 static const u32 fiji_golden_common_all[] =
404 {
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416
417 static const u32 golden_settings_fiji_a10[] =
418 {
419 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470
471 static const u32 golden_settings_iceland_a11[] =
472 {
473 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476 mmGB_GPU_ID, 0x0000000f, 0x00000000,
477 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490
491 static const u32 iceland_golden_common_all[] =
492 {
493 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570
571 static const u32 cz_golden_settings_a11[] =
572 {
573 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575 mmGB_GPU_ID, 0x0000000f, 0x00000000,
576 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586
587 static const u32 cz_golden_common_all[] =
588 {
589 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677
678 static const u32 stoney_golden_settings_a11[] =
679 {
680 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681 mmGB_GPU_ID, 0x0000000f, 0x00000000,
682 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691
692 static const u32 stoney_golden_common_all[] =
693 {
694 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712
713
714 static const char * const sq_edc_source_names[] = {
715 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK 0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT 0x00000000L
735
gfx_v8_0_init_golden_registers(struct amdgpu_device * adev)736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738 uint32_t data;
739
740 switch (adev->asic_type) {
741 case CHIP_TOPAZ:
742 amdgpu_device_program_register_sequence(adev,
743 iceland_mgcg_cgcg_init,
744 ARRAY_SIZE(iceland_mgcg_cgcg_init));
745 amdgpu_device_program_register_sequence(adev,
746 golden_settings_iceland_a11,
747 ARRAY_SIZE(golden_settings_iceland_a11));
748 amdgpu_device_program_register_sequence(adev,
749 iceland_golden_common_all,
750 ARRAY_SIZE(iceland_golden_common_all));
751 break;
752 case CHIP_FIJI:
753 amdgpu_device_program_register_sequence(adev,
754 fiji_mgcg_cgcg_init,
755 ARRAY_SIZE(fiji_mgcg_cgcg_init));
756 amdgpu_device_program_register_sequence(adev,
757 golden_settings_fiji_a10,
758 ARRAY_SIZE(golden_settings_fiji_a10));
759 amdgpu_device_program_register_sequence(adev,
760 fiji_golden_common_all,
761 ARRAY_SIZE(fiji_golden_common_all));
762 break;
763
764 case CHIP_TONGA:
765 amdgpu_device_program_register_sequence(adev,
766 tonga_mgcg_cgcg_init,
767 ARRAY_SIZE(tonga_mgcg_cgcg_init));
768 amdgpu_device_program_register_sequence(adev,
769 golden_settings_tonga_a11,
770 ARRAY_SIZE(golden_settings_tonga_a11));
771 amdgpu_device_program_register_sequence(adev,
772 tonga_golden_common_all,
773 ARRAY_SIZE(tonga_golden_common_all));
774 break;
775 case CHIP_VEGAM:
776 amdgpu_device_program_register_sequence(adev,
777 golden_settings_vegam_a11,
778 ARRAY_SIZE(golden_settings_vegam_a11));
779 amdgpu_device_program_register_sequence(adev,
780 vegam_golden_common_all,
781 ARRAY_SIZE(vegam_golden_common_all));
782 break;
783 case CHIP_POLARIS11:
784 case CHIP_POLARIS12:
785 amdgpu_device_program_register_sequence(adev,
786 golden_settings_polaris11_a11,
787 ARRAY_SIZE(golden_settings_polaris11_a11));
788 amdgpu_device_program_register_sequence(adev,
789 polaris11_golden_common_all,
790 ARRAY_SIZE(polaris11_golden_common_all));
791 break;
792 case CHIP_POLARIS10:
793 amdgpu_device_program_register_sequence(adev,
794 golden_settings_polaris10_a11,
795 ARRAY_SIZE(golden_settings_polaris10_a11));
796 amdgpu_device_program_register_sequence(adev,
797 polaris10_golden_common_all,
798 ARRAY_SIZE(polaris10_golden_common_all));
799 data = RREG32_SMC(ixCG_ACLK_CNTL);
800 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802 WREG32_SMC(ixCG_ACLK_CNTL, data);
803 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809 }
810 break;
811 case CHIP_CARRIZO:
812 amdgpu_device_program_register_sequence(adev,
813 cz_mgcg_cgcg_init,
814 ARRAY_SIZE(cz_mgcg_cgcg_init));
815 amdgpu_device_program_register_sequence(adev,
816 cz_golden_settings_a11,
817 ARRAY_SIZE(cz_golden_settings_a11));
818 amdgpu_device_program_register_sequence(adev,
819 cz_golden_common_all,
820 ARRAY_SIZE(cz_golden_common_all));
821 break;
822 case CHIP_STONEY:
823 amdgpu_device_program_register_sequence(adev,
824 stoney_mgcg_cgcg_init,
825 ARRAY_SIZE(stoney_mgcg_cgcg_init));
826 amdgpu_device_program_register_sequence(adev,
827 stoney_golden_settings_a11,
828 ARRAY_SIZE(stoney_golden_settings_a11));
829 amdgpu_device_program_register_sequence(adev,
830 stoney_golden_common_all,
831 ARRAY_SIZE(stoney_golden_common_all));
832 break;
833 default:
834 break;
835 }
836 }
837
gfx_v8_0_ring_test_ring(struct amdgpu_ring * ring)838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 {
840 struct amdgpu_device *adev = ring->adev;
841 uint32_t tmp = 0;
842 unsigned i;
843 int r;
844
845 WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846 r = amdgpu_ring_alloc(ring, 3);
847 if (r)
848 return r;
849
850 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851 amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852 amdgpu_ring_write(ring, 0xDEADBEEF);
853 amdgpu_ring_commit(ring);
854
855 for (i = 0; i < adev->usec_timeout; i++) {
856 tmp = RREG32(mmSCRATCH_REG0);
857 if (tmp == 0xDEADBEEF)
858 break;
859 udelay(1);
860 }
861
862 if (i >= adev->usec_timeout)
863 r = -ETIMEDOUT;
864
865 return r;
866 }
867
gfx_v8_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 {
870 struct amdgpu_device *adev = ring->adev;
871 struct amdgpu_ib ib;
872 struct dma_fence *f = NULL;
873
874 unsigned int index;
875 uint64_t gpu_addr;
876 uint32_t tmp;
877 long r;
878
879 r = amdgpu_device_wb_get(adev, &index);
880 if (r)
881 return r;
882
883 gpu_addr = adev->wb.gpu_addr + (index * 4);
884 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885 memset(&ib, 0, sizeof(ib));
886
887 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
888 if (r)
889 goto err1;
890
891 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893 ib.ptr[2] = lower_32_bits(gpu_addr);
894 ib.ptr[3] = upper_32_bits(gpu_addr);
895 ib.ptr[4] = 0xDEADBEEF;
896 ib.length_dw = 5;
897
898 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899 if (r)
900 goto err2;
901
902 r = dma_fence_wait_timeout(f, false, timeout);
903 if (r == 0) {
904 r = -ETIMEDOUT;
905 goto err2;
906 } else if (r < 0) {
907 goto err2;
908 }
909
910 tmp = adev->wb.wb[index];
911 if (tmp == 0xDEADBEEF)
912 r = 0;
913 else
914 r = -EINVAL;
915
916 err2:
917 amdgpu_ib_free(&ib, NULL);
918 dma_fence_put(f);
919 err1:
920 amdgpu_device_wb_free(adev, index);
921 return r;
922 }
923
924
gfx_v8_0_free_microcode(struct amdgpu_device * adev)925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927 amdgpu_ucode_release(&adev->gfx.pfp_fw);
928 amdgpu_ucode_release(&adev->gfx.me_fw);
929 amdgpu_ucode_release(&adev->gfx.ce_fw);
930 amdgpu_ucode_release(&adev->gfx.rlc_fw);
931 amdgpu_ucode_release(&adev->gfx.mec_fw);
932 if ((adev->asic_type != CHIP_STONEY) &&
933 (adev->asic_type != CHIP_TOPAZ))
934 amdgpu_ucode_release(&adev->gfx.mec2_fw);
935
936 kfree(adev->gfx.rlc.register_list_format);
937 }
938
gfx_v8_0_init_microcode(struct amdgpu_device * adev)939 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
940 {
941 const char *chip_name;
942 int err;
943 struct amdgpu_firmware_info *info = NULL;
944 const struct common_firmware_header *header = NULL;
945 const struct gfx_firmware_header_v1_0 *cp_hdr;
946 const struct rlc_firmware_header_v2_0 *rlc_hdr;
947 unsigned int *tmp = NULL, i;
948
949 DRM_DEBUG("\n");
950
951 switch (adev->asic_type) {
952 case CHIP_TOPAZ:
953 chip_name = "topaz";
954 break;
955 case CHIP_TONGA:
956 chip_name = "tonga";
957 break;
958 case CHIP_CARRIZO:
959 chip_name = "carrizo";
960 break;
961 case CHIP_FIJI:
962 chip_name = "fiji";
963 break;
964 case CHIP_STONEY:
965 chip_name = "stoney";
966 break;
967 case CHIP_POLARIS10:
968 chip_name = "polaris10";
969 break;
970 case CHIP_POLARIS11:
971 chip_name = "polaris11";
972 break;
973 case CHIP_POLARIS12:
974 chip_name = "polaris12";
975 break;
976 case CHIP_VEGAM:
977 chip_name = "vegam";
978 break;
979 default:
980 BUG();
981 }
982
983 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
984 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
985 AMDGPU_UCODE_OPTIONAL,
986 "amdgpu/%s_pfp_2.bin", chip_name);
987 if (err == -ENODEV) {
988 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
989 AMDGPU_UCODE_REQUIRED,
990 "amdgpu/%s_pfp.bin", chip_name);
991 }
992 } else {
993 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
994 AMDGPU_UCODE_REQUIRED,
995 "amdgpu/%s_pfp.bin", chip_name);
996 }
997 if (err)
998 goto out;
999 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1000 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1001 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1002
1003 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1004 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1005 AMDGPU_UCODE_OPTIONAL,
1006 "amdgpu/%s_me_2.bin", chip_name);
1007 if (err == -ENODEV) {
1008 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1009 AMDGPU_UCODE_REQUIRED,
1010 "amdgpu/%s_me.bin", chip_name);
1011 }
1012 } else {
1013 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1014 AMDGPU_UCODE_REQUIRED,
1015 "amdgpu/%s_me.bin", chip_name);
1016 }
1017 if (err)
1018 goto out;
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1020 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021
1022 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023
1024 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1026 AMDGPU_UCODE_OPTIONAL,
1027 "amdgpu/%s_ce_2.bin", chip_name);
1028 if (err == -ENODEV) {
1029 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1030 AMDGPU_UCODE_REQUIRED,
1031 "amdgpu/%s_ce.bin", chip_name);
1032 }
1033 } else {
1034 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1035 AMDGPU_UCODE_REQUIRED,
1036 "amdgpu/%s_ce.bin", chip_name);
1037 }
1038 if (err)
1039 goto out;
1040 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1041 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1043
1044 /*
1045 * Support for MCBP/Virtualization in combination with chained IBs is
1046 * formal released on feature version #46
1047 */
1048 if (adev->gfx.ce_feature_version >= 46 &&
1049 adev->gfx.pfp_feature_version >= 46) {
1050 adev->virt.chained_ib_support = true;
1051 DRM_INFO("Chained IB support enabled!\n");
1052 } else
1053 adev->virt.chained_ib_support = false;
1054
1055 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1056 AMDGPU_UCODE_REQUIRED,
1057 "amdgpu/%s_rlc.bin", chip_name);
1058 if (err)
1059 goto out;
1060 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1061 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1062 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1063
1064 adev->gfx.rlc.save_and_restore_offset =
1065 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1066 adev->gfx.rlc.clear_state_descriptor_offset =
1067 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1068 adev->gfx.rlc.avail_scratch_ram_locations =
1069 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1070 adev->gfx.rlc.reg_restore_list_size =
1071 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1072 adev->gfx.rlc.reg_list_format_start =
1073 le32_to_cpu(rlc_hdr->reg_list_format_start);
1074 adev->gfx.rlc.reg_list_format_separate_start =
1075 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1076 adev->gfx.rlc.starting_offsets_start =
1077 le32_to_cpu(rlc_hdr->starting_offsets_start);
1078 adev->gfx.rlc.reg_list_format_size_bytes =
1079 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1080 adev->gfx.rlc.reg_list_size_bytes =
1081 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1082
1083 adev->gfx.rlc.register_list_format =
1084 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1085 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1086
1087 if (!adev->gfx.rlc.register_list_format) {
1088 err = -ENOMEM;
1089 goto out;
1090 }
1091
1092 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1093 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1094 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1095 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1096
1097 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1098
1099 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1101 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1102 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1103
1104 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1105 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1106 AMDGPU_UCODE_OPTIONAL,
1107 "amdgpu/%s_mec_2.bin", chip_name);
1108 if (err == -ENODEV) {
1109 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1110 AMDGPU_UCODE_REQUIRED,
1111 "amdgpu/%s_mec.bin", chip_name);
1112 }
1113 } else {
1114 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1115 AMDGPU_UCODE_REQUIRED,
1116 "amdgpu/%s_mec.bin", chip_name);
1117 }
1118 if (err)
1119 goto out;
1120 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1121 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1122 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1123
1124 if ((adev->asic_type != CHIP_STONEY) &&
1125 (adev->asic_type != CHIP_TOPAZ)) {
1126 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1127 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1128 AMDGPU_UCODE_OPTIONAL,
1129 "amdgpu/%s_mec2_2.bin", chip_name);
1130 if (err == -ENODEV) {
1131 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1132 AMDGPU_UCODE_REQUIRED,
1133 "amdgpu/%s_mec2.bin", chip_name);
1134 }
1135 } else {
1136 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1137 AMDGPU_UCODE_REQUIRED,
1138 "amdgpu/%s_mec2.bin", chip_name);
1139 }
1140 if (!err) {
1141 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1142 adev->gfx.mec2_fw->data;
1143 adev->gfx.mec2_fw_version =
1144 le32_to_cpu(cp_hdr->header.ucode_version);
1145 adev->gfx.mec2_feature_version =
1146 le32_to_cpu(cp_hdr->ucode_feature_version);
1147 } else {
1148 err = 0;
1149 adev->gfx.mec2_fw = NULL;
1150 }
1151 }
1152
1153 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1154 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1155 info->fw = adev->gfx.pfp_fw;
1156 header = (const struct common_firmware_header *)info->fw->data;
1157 adev->firmware.fw_size +=
1158 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1159
1160 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1161 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1162 info->fw = adev->gfx.me_fw;
1163 header = (const struct common_firmware_header *)info->fw->data;
1164 adev->firmware.fw_size +=
1165 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166
1167 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1168 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1169 info->fw = adev->gfx.ce_fw;
1170 header = (const struct common_firmware_header *)info->fw->data;
1171 adev->firmware.fw_size +=
1172 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173
1174 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1175 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1176 info->fw = adev->gfx.rlc_fw;
1177 header = (const struct common_firmware_header *)info->fw->data;
1178 adev->firmware.fw_size +=
1179 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1182 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1183 info->fw = adev->gfx.mec_fw;
1184 header = (const struct common_firmware_header *)info->fw->data;
1185 adev->firmware.fw_size +=
1186 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188 /* we need account JT in */
1189 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1190 adev->firmware.fw_size +=
1191 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1192
1193 if (amdgpu_sriov_vf(adev)) {
1194 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1195 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1196 info->fw = adev->gfx.mec_fw;
1197 adev->firmware.fw_size +=
1198 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1199 }
1200
1201 if (adev->gfx.mec2_fw) {
1202 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1203 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1204 info->fw = adev->gfx.mec2_fw;
1205 header = (const struct common_firmware_header *)info->fw->data;
1206 adev->firmware.fw_size +=
1207 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1208 }
1209
1210 out:
1211 if (err) {
1212 dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name);
1213 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1214 amdgpu_ucode_release(&adev->gfx.me_fw);
1215 amdgpu_ucode_release(&adev->gfx.ce_fw);
1216 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1217 amdgpu_ucode_release(&adev->gfx.mec_fw);
1218 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1219 }
1220 return err;
1221 }
1222
gfx_v8_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1223 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1224 volatile u32 *buffer)
1225 {
1226 u32 count = 0;
1227
1228 if (adev->gfx.rlc.cs_data == NULL)
1229 return;
1230 if (buffer == NULL)
1231 return;
1232
1233 count = amdgpu_gfx_csb_preamble_start(buffer);
1234 count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
1235
1236 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1237 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
1238 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1239 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1240
1241 amdgpu_gfx_csb_preamble_end(buffer, count);
1242 }
1243
gfx_v8_0_cp_jump_table_num(struct amdgpu_device * adev)1244 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1245 {
1246 if (adev->asic_type == CHIP_CARRIZO)
1247 return 5;
1248 else
1249 return 4;
1250 }
1251
gfx_v8_0_rlc_init(struct amdgpu_device * adev)1252 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1253 {
1254 const struct cs_section_def *cs_data;
1255 int r;
1256
1257 adev->gfx.rlc.cs_data = vi_cs_data;
1258
1259 cs_data = adev->gfx.rlc.cs_data;
1260
1261 if (cs_data) {
1262 /* init clear state block */
1263 r = amdgpu_gfx_rlc_init_csb(adev);
1264 if (r)
1265 return r;
1266 }
1267
1268 if ((adev->asic_type == CHIP_CARRIZO) ||
1269 (adev->asic_type == CHIP_STONEY)) {
1270 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1271 r = amdgpu_gfx_rlc_init_cpt(adev);
1272 if (r)
1273 return r;
1274 }
1275
1276 /* init spm vmid with 0xf */
1277 if (adev->gfx.rlc.funcs->update_spm_vmid)
1278 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
1279
1280 return 0;
1281 }
1282
gfx_v8_0_mec_fini(struct amdgpu_device * adev)1283 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1284 {
1285 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1286 }
1287
gfx_v8_0_mec_init(struct amdgpu_device * adev)1288 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1289 {
1290 int r;
1291 u32 *hpd;
1292 size_t mec_hpd_size;
1293
1294 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1295
1296 /* take ownership of the relevant compute queues */
1297 amdgpu_gfx_compute_queue_acquire(adev);
1298
1299 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1300 if (mec_hpd_size) {
1301 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1302 AMDGPU_GEM_DOMAIN_VRAM |
1303 AMDGPU_GEM_DOMAIN_GTT,
1304 &adev->gfx.mec.hpd_eop_obj,
1305 &adev->gfx.mec.hpd_eop_gpu_addr,
1306 (void **)&hpd);
1307 if (r) {
1308 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1309 return r;
1310 }
1311
1312 memset(hpd, 0, mec_hpd_size);
1313
1314 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1315 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1316 }
1317
1318 return 0;
1319 }
1320
1321 static const u32 vgpr_init_compute_shader[] =
1322 {
1323 0x7e000209, 0x7e020208,
1324 0x7e040207, 0x7e060206,
1325 0x7e080205, 0x7e0a0204,
1326 0x7e0c0203, 0x7e0e0202,
1327 0x7e100201, 0x7e120200,
1328 0x7e140209, 0x7e160208,
1329 0x7e180207, 0x7e1a0206,
1330 0x7e1c0205, 0x7e1e0204,
1331 0x7e200203, 0x7e220202,
1332 0x7e240201, 0x7e260200,
1333 0x7e280209, 0x7e2a0208,
1334 0x7e2c0207, 0x7e2e0206,
1335 0x7e300205, 0x7e320204,
1336 0x7e340203, 0x7e360202,
1337 0x7e380201, 0x7e3a0200,
1338 0x7e3c0209, 0x7e3e0208,
1339 0x7e400207, 0x7e420206,
1340 0x7e440205, 0x7e460204,
1341 0x7e480203, 0x7e4a0202,
1342 0x7e4c0201, 0x7e4e0200,
1343 0x7e500209, 0x7e520208,
1344 0x7e540207, 0x7e560206,
1345 0x7e580205, 0x7e5a0204,
1346 0x7e5c0203, 0x7e5e0202,
1347 0x7e600201, 0x7e620200,
1348 0x7e640209, 0x7e660208,
1349 0x7e680207, 0x7e6a0206,
1350 0x7e6c0205, 0x7e6e0204,
1351 0x7e700203, 0x7e720202,
1352 0x7e740201, 0x7e760200,
1353 0x7e780209, 0x7e7a0208,
1354 0x7e7c0207, 0x7e7e0206,
1355 0xbf8a0000, 0xbf810000,
1356 };
1357
1358 static const u32 sgpr_init_compute_shader[] =
1359 {
1360 0xbe8a0100, 0xbe8c0102,
1361 0xbe8e0104, 0xbe900106,
1362 0xbe920108, 0xbe940100,
1363 0xbe960102, 0xbe980104,
1364 0xbe9a0106, 0xbe9c0108,
1365 0xbe9e0100, 0xbea00102,
1366 0xbea20104, 0xbea40106,
1367 0xbea60108, 0xbea80100,
1368 0xbeaa0102, 0xbeac0104,
1369 0xbeae0106, 0xbeb00108,
1370 0xbeb20100, 0xbeb40102,
1371 0xbeb60104, 0xbeb80106,
1372 0xbeba0108, 0xbebc0100,
1373 0xbebe0102, 0xbec00104,
1374 0xbec20106, 0xbec40108,
1375 0xbec60100, 0xbec80102,
1376 0xbee60004, 0xbee70005,
1377 0xbeea0006, 0xbeeb0007,
1378 0xbee80008, 0xbee90009,
1379 0xbefc0000, 0xbf8a0000,
1380 0xbf810000, 0x00000000,
1381 };
1382
1383 static const u32 vgpr_init_regs[] =
1384 {
1385 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1386 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1387 mmCOMPUTE_NUM_THREAD_X, 256*4,
1388 mmCOMPUTE_NUM_THREAD_Y, 1,
1389 mmCOMPUTE_NUM_THREAD_Z, 1,
1390 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1391 mmCOMPUTE_PGM_RSRC2, 20,
1392 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1393 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1394 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1395 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1396 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1397 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1398 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1399 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1400 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1401 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1402 };
1403
1404 static const u32 sgpr1_init_regs[] =
1405 {
1406 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1407 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1408 mmCOMPUTE_NUM_THREAD_X, 256*5,
1409 mmCOMPUTE_NUM_THREAD_Y, 1,
1410 mmCOMPUTE_NUM_THREAD_Z, 1,
1411 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1412 mmCOMPUTE_PGM_RSRC2, 20,
1413 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1414 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1415 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1416 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1417 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1418 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1419 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1420 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1421 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1422 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1423 };
1424
1425 static const u32 sgpr2_init_regs[] =
1426 {
1427 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1428 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1429 mmCOMPUTE_NUM_THREAD_X, 256*5,
1430 mmCOMPUTE_NUM_THREAD_Y, 1,
1431 mmCOMPUTE_NUM_THREAD_Z, 1,
1432 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1433 mmCOMPUTE_PGM_RSRC2, 20,
1434 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1435 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1436 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1437 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1438 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1439 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1440 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1441 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1442 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1443 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1444 };
1445
1446 static const u32 sec_ded_counter_registers[] =
1447 {
1448 mmCPC_EDC_ATC_CNT,
1449 mmCPC_EDC_SCRATCH_CNT,
1450 mmCPC_EDC_UCODE_CNT,
1451 mmCPF_EDC_ATC_CNT,
1452 mmCPF_EDC_ROQ_CNT,
1453 mmCPF_EDC_TAG_CNT,
1454 mmCPG_EDC_ATC_CNT,
1455 mmCPG_EDC_DMA_CNT,
1456 mmCPG_EDC_TAG_CNT,
1457 mmDC_EDC_CSINVOC_CNT,
1458 mmDC_EDC_RESTORE_CNT,
1459 mmDC_EDC_STATE_CNT,
1460 mmGDS_EDC_CNT,
1461 mmGDS_EDC_GRBM_CNT,
1462 mmGDS_EDC_OA_DED,
1463 mmSPI_EDC_CNT,
1464 mmSQC_ATC_EDC_GATCL1_CNT,
1465 mmSQC_EDC_CNT,
1466 mmSQ_EDC_DED_CNT,
1467 mmSQ_EDC_INFO,
1468 mmSQ_EDC_SEC_CNT,
1469 mmTCC_EDC_CNT,
1470 mmTCP_ATC_EDC_GATCL1_CNT,
1471 mmTCP_EDC_CNT,
1472 mmTD_EDC_CNT
1473 };
1474
gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)1475 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1476 {
1477 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1478 struct amdgpu_ib ib;
1479 struct dma_fence *f = NULL;
1480 int r, i;
1481 u32 tmp;
1482 unsigned total_size, vgpr_offset, sgpr_offset;
1483 u64 gpu_addr;
1484
1485 /* only supported on CZ */
1486 if (adev->asic_type != CHIP_CARRIZO)
1487 return 0;
1488
1489 /* bail if the compute ring is not ready */
1490 if (!ring->sched.ready)
1491 return 0;
1492
1493 tmp = RREG32(mmGB_EDC_MODE);
1494 WREG32(mmGB_EDC_MODE, 0);
1495
1496 total_size =
1497 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1498 total_size +=
1499 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1500 total_size +=
1501 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1502 total_size = ALIGN(total_size, 256);
1503 vgpr_offset = total_size;
1504 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1505 sgpr_offset = total_size;
1506 total_size += sizeof(sgpr_init_compute_shader);
1507
1508 /* allocate an indirect buffer to put the commands in */
1509 memset(&ib, 0, sizeof(ib));
1510 r = amdgpu_ib_get(adev, NULL, total_size,
1511 AMDGPU_IB_POOL_DIRECT, &ib);
1512 if (r) {
1513 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1514 return r;
1515 }
1516
1517 /* load the compute shaders */
1518 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1519 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1520
1521 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1522 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1523
1524 /* init the ib length to 0 */
1525 ib.length_dw = 0;
1526
1527 /* VGPR */
1528 /* write the register state for the compute dispatch */
1529 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1530 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1531 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1532 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1533 }
1534 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1535 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1536 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1537 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1538 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1539 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1540
1541 /* write dispatch packet */
1542 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1543 ib.ptr[ib.length_dw++] = 8; /* x */
1544 ib.ptr[ib.length_dw++] = 1; /* y */
1545 ib.ptr[ib.length_dw++] = 1; /* z */
1546 ib.ptr[ib.length_dw++] =
1547 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1548
1549 /* write CS partial flush packet */
1550 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1551 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1552
1553 /* SGPR1 */
1554 /* write the register state for the compute dispatch */
1555 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1556 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1557 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1558 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1559 }
1560 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1561 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1562 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1563 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1564 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1565 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1566
1567 /* write dispatch packet */
1568 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1569 ib.ptr[ib.length_dw++] = 8; /* x */
1570 ib.ptr[ib.length_dw++] = 1; /* y */
1571 ib.ptr[ib.length_dw++] = 1; /* z */
1572 ib.ptr[ib.length_dw++] =
1573 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1574
1575 /* write CS partial flush packet */
1576 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1577 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1578
1579 /* SGPR2 */
1580 /* write the register state for the compute dispatch */
1581 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1582 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1583 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1584 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1585 }
1586 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1587 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1588 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1589 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1590 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1591 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1592
1593 /* write dispatch packet */
1594 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1595 ib.ptr[ib.length_dw++] = 8; /* x */
1596 ib.ptr[ib.length_dw++] = 1; /* y */
1597 ib.ptr[ib.length_dw++] = 1; /* z */
1598 ib.ptr[ib.length_dw++] =
1599 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1600
1601 /* write CS partial flush packet */
1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1603 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1604
1605 /* shedule the ib on the ring */
1606 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1607 if (r) {
1608 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1609 goto fail;
1610 }
1611
1612 /* wait for the GPU to finish processing the IB */
1613 r = dma_fence_wait(f, false);
1614 if (r) {
1615 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1616 goto fail;
1617 }
1618
1619 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1620 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1621 WREG32(mmGB_EDC_MODE, tmp);
1622
1623 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1624 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1625 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1626
1627
1628 /* read back registers to clear the counters */
1629 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1630 RREG32(sec_ded_counter_registers[i]);
1631
1632 fail:
1633 amdgpu_ib_free(&ib, NULL);
1634 dma_fence_put(f);
1635
1636 return r;
1637 }
1638
gfx_v8_0_gpu_early_init(struct amdgpu_device * adev)1639 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1640 {
1641 u32 gb_addr_config;
1642 u32 mc_arb_ramcfg;
1643 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1644 u32 tmp;
1645 int ret;
1646
1647 switch (adev->asic_type) {
1648 case CHIP_TOPAZ:
1649 adev->gfx.config.max_shader_engines = 1;
1650 adev->gfx.config.max_tile_pipes = 2;
1651 adev->gfx.config.max_cu_per_sh = 6;
1652 adev->gfx.config.max_sh_per_se = 1;
1653 adev->gfx.config.max_backends_per_se = 2;
1654 adev->gfx.config.max_texture_channel_caches = 2;
1655 adev->gfx.config.max_gprs = 256;
1656 adev->gfx.config.max_gs_threads = 32;
1657 adev->gfx.config.max_hw_contexts = 8;
1658
1659 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1660 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1661 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1662 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1663 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1664 break;
1665 case CHIP_FIJI:
1666 adev->gfx.config.max_shader_engines = 4;
1667 adev->gfx.config.max_tile_pipes = 16;
1668 adev->gfx.config.max_cu_per_sh = 16;
1669 adev->gfx.config.max_sh_per_se = 1;
1670 adev->gfx.config.max_backends_per_se = 4;
1671 adev->gfx.config.max_texture_channel_caches = 16;
1672 adev->gfx.config.max_gprs = 256;
1673 adev->gfx.config.max_gs_threads = 32;
1674 adev->gfx.config.max_hw_contexts = 8;
1675
1676 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1677 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1678 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1679 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1680 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1681 break;
1682 case CHIP_POLARIS11:
1683 case CHIP_POLARIS12:
1684 ret = amdgpu_atombios_get_gfx_info(adev);
1685 if (ret)
1686 return ret;
1687 adev->gfx.config.max_gprs = 256;
1688 adev->gfx.config.max_gs_threads = 32;
1689 adev->gfx.config.max_hw_contexts = 8;
1690
1691 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1692 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1693 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1694 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1695 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1696 break;
1697 case CHIP_POLARIS10:
1698 case CHIP_VEGAM:
1699 ret = amdgpu_atombios_get_gfx_info(adev);
1700 if (ret)
1701 return ret;
1702 adev->gfx.config.max_gprs = 256;
1703 adev->gfx.config.max_gs_threads = 32;
1704 adev->gfx.config.max_hw_contexts = 8;
1705
1706 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1707 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1708 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1709 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1710 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1711 break;
1712 case CHIP_TONGA:
1713 adev->gfx.config.max_shader_engines = 4;
1714 adev->gfx.config.max_tile_pipes = 8;
1715 adev->gfx.config.max_cu_per_sh = 8;
1716 adev->gfx.config.max_sh_per_se = 1;
1717 adev->gfx.config.max_backends_per_se = 2;
1718 adev->gfx.config.max_texture_channel_caches = 8;
1719 adev->gfx.config.max_gprs = 256;
1720 adev->gfx.config.max_gs_threads = 32;
1721 adev->gfx.config.max_hw_contexts = 8;
1722
1723 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1724 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1725 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1726 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1727 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1728 break;
1729 case CHIP_CARRIZO:
1730 adev->gfx.config.max_shader_engines = 1;
1731 adev->gfx.config.max_tile_pipes = 2;
1732 adev->gfx.config.max_sh_per_se = 1;
1733 adev->gfx.config.max_backends_per_se = 2;
1734 adev->gfx.config.max_cu_per_sh = 8;
1735 adev->gfx.config.max_texture_channel_caches = 2;
1736 adev->gfx.config.max_gprs = 256;
1737 adev->gfx.config.max_gs_threads = 32;
1738 adev->gfx.config.max_hw_contexts = 8;
1739
1740 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1741 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1742 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1743 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1744 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1745 break;
1746 case CHIP_STONEY:
1747 adev->gfx.config.max_shader_engines = 1;
1748 adev->gfx.config.max_tile_pipes = 2;
1749 adev->gfx.config.max_sh_per_se = 1;
1750 adev->gfx.config.max_backends_per_se = 1;
1751 adev->gfx.config.max_cu_per_sh = 3;
1752 adev->gfx.config.max_texture_channel_caches = 2;
1753 adev->gfx.config.max_gprs = 256;
1754 adev->gfx.config.max_gs_threads = 16;
1755 adev->gfx.config.max_hw_contexts = 8;
1756
1757 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1762 break;
1763 default:
1764 adev->gfx.config.max_shader_engines = 2;
1765 adev->gfx.config.max_tile_pipes = 4;
1766 adev->gfx.config.max_cu_per_sh = 2;
1767 adev->gfx.config.max_sh_per_se = 1;
1768 adev->gfx.config.max_backends_per_se = 2;
1769 adev->gfx.config.max_texture_channel_caches = 4;
1770 adev->gfx.config.max_gprs = 256;
1771 adev->gfx.config.max_gs_threads = 32;
1772 adev->gfx.config.max_hw_contexts = 8;
1773
1774 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1779 break;
1780 }
1781
1782 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1783 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1784
1785 adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1786 MC_ARB_RAMCFG, NOOFBANK);
1787 adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1788 MC_ARB_RAMCFG, NOOFRANKS);
1789
1790 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1791 adev->gfx.config.mem_max_burst_length_bytes = 256;
1792 if (adev->flags & AMD_IS_APU) {
1793 /* Get memory bank mapping mode. */
1794 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1795 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1796 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1797
1798 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1799 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1800 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1801
1802 /* Validate settings in case only one DIMM installed. */
1803 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1804 dimm00_addr_map = 0;
1805 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1806 dimm01_addr_map = 0;
1807 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1808 dimm10_addr_map = 0;
1809 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1810 dimm11_addr_map = 0;
1811
1812 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1813 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1814 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1815 adev->gfx.config.mem_row_size_in_kb = 2;
1816 else
1817 adev->gfx.config.mem_row_size_in_kb = 1;
1818 } else {
1819 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1820 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1821 if (adev->gfx.config.mem_row_size_in_kb > 4)
1822 adev->gfx.config.mem_row_size_in_kb = 4;
1823 }
1824
1825 adev->gfx.config.shader_engine_tile_size = 32;
1826 adev->gfx.config.num_gpus = 1;
1827 adev->gfx.config.multi_gpu_tile_size = 64;
1828
1829 /* fix up row size */
1830 switch (adev->gfx.config.mem_row_size_in_kb) {
1831 case 1:
1832 default:
1833 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1834 break;
1835 case 2:
1836 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1837 break;
1838 case 4:
1839 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1840 break;
1841 }
1842 adev->gfx.config.gb_addr_config = gb_addr_config;
1843
1844 return 0;
1845 }
1846
gfx_v8_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)1847 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1848 int mec, int pipe, int queue)
1849 {
1850 int r;
1851 unsigned irq_type;
1852 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1853 unsigned int hw_prio;
1854
1855 ring = &adev->gfx.compute_ring[ring_id];
1856
1857 /* mec0 is me1 */
1858 ring->me = mec + 1;
1859 ring->pipe = pipe;
1860 ring->queue = queue;
1861
1862 ring->ring_obj = NULL;
1863 ring->use_doorbell = true;
1864 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1865 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1866 + (ring_id * GFX8_MEC_HPD_SIZE);
1867 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1868
1869 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1870 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1871 + ring->pipe;
1872
1873 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1874 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1875 /* type-2 packets are deprecated on MEC, use type-3 instead */
1876 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1877 hw_prio, NULL);
1878 if (r)
1879 return r;
1880
1881
1882 return 0;
1883 }
1884
1885 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1886
gfx_v8_0_sw_init(struct amdgpu_ip_block * ip_block)1887 static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
1888 {
1889 int i, j, k, r, ring_id;
1890 int xcc_id = 0;
1891 struct amdgpu_ring *ring;
1892 struct amdgpu_device *adev = ip_block->adev;
1893
1894 switch (adev->asic_type) {
1895 case CHIP_TONGA:
1896 case CHIP_CARRIZO:
1897 case CHIP_FIJI:
1898 case CHIP_POLARIS10:
1899 case CHIP_POLARIS11:
1900 case CHIP_POLARIS12:
1901 case CHIP_VEGAM:
1902 adev->gfx.mec.num_mec = 2;
1903 break;
1904 case CHIP_TOPAZ:
1905 case CHIP_STONEY:
1906 default:
1907 adev->gfx.mec.num_mec = 1;
1908 break;
1909 }
1910
1911 adev->gfx.mec.num_pipe_per_mec = 4;
1912 adev->gfx.mec.num_queue_per_pipe = 8;
1913
1914 /* EOP Event */
1915 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1916 if (r)
1917 return r;
1918
1919 /* Privileged reg */
1920 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1921 &adev->gfx.priv_reg_irq);
1922 if (r)
1923 return r;
1924
1925 /* Privileged inst */
1926 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1927 &adev->gfx.priv_inst_irq);
1928 if (r)
1929 return r;
1930
1931 /* Add CP EDC/ECC irq */
1932 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1933 &adev->gfx.cp_ecc_error_irq);
1934 if (r)
1935 return r;
1936
1937 /* SQ interrupts. */
1938 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1939 &adev->gfx.sq_irq);
1940 if (r) {
1941 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1942 return r;
1943 }
1944
1945 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1946
1947 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1948
1949 r = gfx_v8_0_init_microcode(adev);
1950 if (r) {
1951 DRM_ERROR("Failed to load gfx firmware!\n");
1952 return r;
1953 }
1954
1955 r = adev->gfx.rlc.funcs->init(adev);
1956 if (r) {
1957 DRM_ERROR("Failed to init rlc BOs!\n");
1958 return r;
1959 }
1960
1961 r = gfx_v8_0_mec_init(adev);
1962 if (r) {
1963 DRM_ERROR("Failed to init MEC BOs!\n");
1964 return r;
1965 }
1966
1967 /* set up the gfx ring */
1968 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1969 ring = &adev->gfx.gfx_ring[i];
1970 ring->ring_obj = NULL;
1971 sprintf(ring->name, "gfx");
1972 /* no gfx doorbells on iceland */
1973 if (adev->asic_type != CHIP_TOPAZ) {
1974 ring->use_doorbell = true;
1975 ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1976 }
1977
1978 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1979 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
1980 AMDGPU_RING_PRIO_DEFAULT, NULL);
1981 if (r)
1982 return r;
1983 }
1984
1985
1986 /* set up the compute queues - allocate horizontally across pipes */
1987 ring_id = 0;
1988 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1989 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1990 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1991 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1992 k, j))
1993 continue;
1994
1995 r = gfx_v8_0_compute_ring_init(adev,
1996 ring_id,
1997 i, k, j);
1998 if (r)
1999 return r;
2000
2001 ring_id++;
2002 }
2003 }
2004 }
2005
2006 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
2007 if (r) {
2008 DRM_ERROR("Failed to init KIQ BOs!\n");
2009 return r;
2010 }
2011
2012 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2013 if (r)
2014 return r;
2015
2016 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2017 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
2018 if (r)
2019 return r;
2020
2021 adev->gfx.ce_ram_size = 0x8000;
2022
2023 r = gfx_v8_0_gpu_early_init(adev);
2024 if (r)
2025 return r;
2026
2027 return 0;
2028 }
2029
gfx_v8_0_sw_fini(struct amdgpu_ip_block * ip_block)2030 static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
2031 {
2032 struct amdgpu_device *adev = ip_block->adev;
2033 int i;
2034
2035 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2036 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2037 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2038 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2039
2040 amdgpu_gfx_mqd_sw_fini(adev, 0);
2041 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2042 amdgpu_gfx_kiq_fini(adev, 0);
2043
2044 gfx_v8_0_mec_fini(adev);
2045 amdgpu_gfx_rlc_fini(adev);
2046 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2047 &adev->gfx.rlc.clear_state_gpu_addr,
2048 (void **)&adev->gfx.rlc.cs_ptr);
2049 if ((adev->asic_type == CHIP_CARRIZO) ||
2050 (adev->asic_type == CHIP_STONEY)) {
2051 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2052 &adev->gfx.rlc.cp_table_gpu_addr,
2053 (void **)&adev->gfx.rlc.cp_table_ptr);
2054 }
2055 gfx_v8_0_free_microcode(adev);
2056
2057 return 0;
2058 }
2059
gfx_v8_0_tiling_mode_table_init(struct amdgpu_device * adev)2060 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2061 {
2062 uint32_t *modearray, *mod2array;
2063 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2064 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2065 u32 reg_offset;
2066
2067 modearray = adev->gfx.config.tile_mode_array;
2068 mod2array = adev->gfx.config.macrotile_mode_array;
2069
2070 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2071 modearray[reg_offset] = 0;
2072
2073 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2074 mod2array[reg_offset] = 0;
2075
2076 switch (adev->asic_type) {
2077 case CHIP_TOPAZ:
2078 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2079 PIPE_CONFIG(ADDR_SURF_P2) |
2080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2081 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2082 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083 PIPE_CONFIG(ADDR_SURF_P2) |
2084 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2086 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087 PIPE_CONFIG(ADDR_SURF_P2) |
2088 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2090 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 PIPE_CONFIG(ADDR_SURF_P2) |
2092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2093 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2094 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2095 PIPE_CONFIG(ADDR_SURF_P2) |
2096 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2097 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2098 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2099 PIPE_CONFIG(ADDR_SURF_P2) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2102 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2103 PIPE_CONFIG(ADDR_SURF_P2) |
2104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2107 PIPE_CONFIG(ADDR_SURF_P2));
2108 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109 PIPE_CONFIG(ADDR_SURF_P2) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2112 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2113 PIPE_CONFIG(ADDR_SURF_P2) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2116 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2117 PIPE_CONFIG(ADDR_SURF_P2) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2120 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2121 PIPE_CONFIG(ADDR_SURF_P2) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2124 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2125 PIPE_CONFIG(ADDR_SURF_P2) |
2126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P2) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2133 PIPE_CONFIG(ADDR_SURF_P2) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2136 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2137 PIPE_CONFIG(ADDR_SURF_P2) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2140 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2141 PIPE_CONFIG(ADDR_SURF_P2) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2144 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2145 PIPE_CONFIG(ADDR_SURF_P2) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2148 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2149 PIPE_CONFIG(ADDR_SURF_P2) |
2150 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2152 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2153 PIPE_CONFIG(ADDR_SURF_P2) |
2154 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2156 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2157 PIPE_CONFIG(ADDR_SURF_P2) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2160 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2161 PIPE_CONFIG(ADDR_SURF_P2) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2165 PIPE_CONFIG(ADDR_SURF_P2) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2169 PIPE_CONFIG(ADDR_SURF_P2) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2173 PIPE_CONFIG(ADDR_SURF_P2) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2177 PIPE_CONFIG(ADDR_SURF_P2) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2180
2181 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2182 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2183 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2184 NUM_BANKS(ADDR_SURF_8_BANK));
2185 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188 NUM_BANKS(ADDR_SURF_8_BANK));
2189 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192 NUM_BANKS(ADDR_SURF_8_BANK));
2193 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2196 NUM_BANKS(ADDR_SURF_8_BANK));
2197 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2200 NUM_BANKS(ADDR_SURF_8_BANK));
2201 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2204 NUM_BANKS(ADDR_SURF_8_BANK));
2205 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208 NUM_BANKS(ADDR_SURF_8_BANK));
2209 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2212 NUM_BANKS(ADDR_SURF_16_BANK));
2213 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2216 NUM_BANKS(ADDR_SURF_16_BANK));
2217 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220 NUM_BANKS(ADDR_SURF_16_BANK));
2221 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224 NUM_BANKS(ADDR_SURF_16_BANK));
2225 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228 NUM_BANKS(ADDR_SURF_16_BANK));
2229 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232 NUM_BANKS(ADDR_SURF_16_BANK));
2233 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2236 NUM_BANKS(ADDR_SURF_8_BANK));
2237
2238 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2239 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2240 reg_offset != 23)
2241 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2242
2243 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2244 if (reg_offset != 7)
2245 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2246
2247 break;
2248 case CHIP_FIJI:
2249 case CHIP_VEGAM:
2250 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2251 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2252 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2254 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2258 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2262 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2266 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2269 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2270 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2271 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2272 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2273 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2274 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2278 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2282 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2284 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2285 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2289 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2292 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2293 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2296 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2297 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2300 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2301 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2305 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2308 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2309 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2313 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2316 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2317 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2320 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2321 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2324 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2328 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2332 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2336 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2340 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2341 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2344 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2345 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2348 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2352 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2368 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372
2373 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2376 NUM_BANKS(ADDR_SURF_8_BANK));
2377 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2380 NUM_BANKS(ADDR_SURF_8_BANK));
2381 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2384 NUM_BANKS(ADDR_SURF_8_BANK));
2385 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2388 NUM_BANKS(ADDR_SURF_8_BANK));
2389 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2392 NUM_BANKS(ADDR_SURF_8_BANK));
2393 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2396 NUM_BANKS(ADDR_SURF_8_BANK));
2397 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2400 NUM_BANKS(ADDR_SURF_8_BANK));
2401 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2404 NUM_BANKS(ADDR_SURF_8_BANK));
2405 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2408 NUM_BANKS(ADDR_SURF_8_BANK));
2409 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412 NUM_BANKS(ADDR_SURF_8_BANK));
2413 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2416 NUM_BANKS(ADDR_SURF_8_BANK));
2417 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420 NUM_BANKS(ADDR_SURF_8_BANK));
2421 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424 NUM_BANKS(ADDR_SURF_8_BANK));
2425 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2428 NUM_BANKS(ADDR_SURF_4_BANK));
2429
2430 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2431 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2432
2433 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2434 if (reg_offset != 7)
2435 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2436
2437 break;
2438 case CHIP_TONGA:
2439 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2443 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2447 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2451 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2455 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2459 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2460 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2461 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2463 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2464 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2467 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2468 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2469 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2471 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2472 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2473 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2477 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2478 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2482 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2485 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2486 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2487 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2489 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2490 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2491 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2492 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2493 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2496 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2498 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2500 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2501 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2502 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2505 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2506 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2509 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2510 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2513 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2517 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2521 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2522 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2524 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2525 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2528 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2529 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2530 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2532 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2533 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2537 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2541 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2545 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2548 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2557 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561
2562 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2565 NUM_BANKS(ADDR_SURF_16_BANK));
2566 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2569 NUM_BANKS(ADDR_SURF_16_BANK));
2570 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2573 NUM_BANKS(ADDR_SURF_16_BANK));
2574 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2577 NUM_BANKS(ADDR_SURF_16_BANK));
2578 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2580 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2581 NUM_BANKS(ADDR_SURF_16_BANK));
2582 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2584 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2585 NUM_BANKS(ADDR_SURF_16_BANK));
2586 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2589 NUM_BANKS(ADDR_SURF_16_BANK));
2590 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2592 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2593 NUM_BANKS(ADDR_SURF_16_BANK));
2594 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2596 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2597 NUM_BANKS(ADDR_SURF_16_BANK));
2598 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2600 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2601 NUM_BANKS(ADDR_SURF_16_BANK));
2602 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2604 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2605 NUM_BANKS(ADDR_SURF_16_BANK));
2606 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2608 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2609 NUM_BANKS(ADDR_SURF_8_BANK));
2610 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2613 NUM_BANKS(ADDR_SURF_4_BANK));
2614 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2616 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2617 NUM_BANKS(ADDR_SURF_4_BANK));
2618
2619 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2620 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2621
2622 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2623 if (reg_offset != 7)
2624 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2625
2626 break;
2627 case CHIP_POLARIS11:
2628 case CHIP_POLARIS12:
2629 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2631 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2633 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2637 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2638 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2639 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2640 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2641 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2643 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2644 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2645 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2648 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2649 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2652 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2653 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2654 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2657 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2661 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2662 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2663 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2664 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2666 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2668 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2670 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2671 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2672 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2674 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2675 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2676 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2679 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2680 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2682 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2683 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2686 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2687 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2688 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2691 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2692 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2694 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2695 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2699 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2700 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2703 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2707 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2711 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2715 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2716 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2719 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2723 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2727 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2731 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2734 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2735 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2738 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2742 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2747 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2750 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751
2752 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2754 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2755 NUM_BANKS(ADDR_SURF_16_BANK));
2756
2757 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2760 NUM_BANKS(ADDR_SURF_16_BANK));
2761
2762 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765 NUM_BANKS(ADDR_SURF_16_BANK));
2766
2767 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2769 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770 NUM_BANKS(ADDR_SURF_16_BANK));
2771
2772 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2775 NUM_BANKS(ADDR_SURF_16_BANK));
2776
2777 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2779 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2780 NUM_BANKS(ADDR_SURF_16_BANK));
2781
2782 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2785 NUM_BANKS(ADDR_SURF_16_BANK));
2786
2787 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2788 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2789 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2790 NUM_BANKS(ADDR_SURF_16_BANK));
2791
2792 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2793 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2795 NUM_BANKS(ADDR_SURF_16_BANK));
2796
2797 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2799 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800 NUM_BANKS(ADDR_SURF_16_BANK));
2801
2802 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2804 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805 NUM_BANKS(ADDR_SURF_16_BANK));
2806
2807 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2809 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2810 NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815 NUM_BANKS(ADDR_SURF_8_BANK));
2816
2817 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2820 NUM_BANKS(ADDR_SURF_4_BANK));
2821
2822 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2823 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2824
2825 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2826 if (reg_offset != 7)
2827 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2828
2829 break;
2830 case CHIP_POLARIS10:
2831 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2833 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2834 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2835 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2837 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2838 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2839 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2841 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2842 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2843 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2844 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2845 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2847 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2849 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2851 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2853 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2854 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2855 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2859 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2863 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2864 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2865 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2866 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2867 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2870 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2871 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2873 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2874 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2875 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2877 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2878 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2881 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2882 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2883 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2888 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2889 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2890 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2894 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2897 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2898 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2901 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2902 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2904 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2905 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2908 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2909 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2912 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2913 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2914 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2917 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2920 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2921 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2922 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2923 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2924 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2925 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2929 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2933 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2936 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2937 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2948 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2949 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2951 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2952 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953
2954 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957 NUM_BANKS(ADDR_SURF_16_BANK));
2958
2959 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962 NUM_BANKS(ADDR_SURF_16_BANK));
2963
2964 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 NUM_BANKS(ADDR_SURF_16_BANK));
2968
2969 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2972 NUM_BANKS(ADDR_SURF_16_BANK));
2973
2974 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2977 NUM_BANKS(ADDR_SURF_16_BANK));
2978
2979 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2982 NUM_BANKS(ADDR_SURF_16_BANK));
2983
2984 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2987 NUM_BANKS(ADDR_SURF_16_BANK));
2988
2989 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2990 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2991 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2992 NUM_BANKS(ADDR_SURF_16_BANK));
2993
2994 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997 NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3001 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002 NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3007 NUM_BANKS(ADDR_SURF_16_BANK));
3008
3009 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3012 NUM_BANKS(ADDR_SURF_8_BANK));
3013
3014 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3017 NUM_BANKS(ADDR_SURF_4_BANK));
3018
3019 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022 NUM_BANKS(ADDR_SURF_4_BANK));
3023
3024 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3025 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3026
3027 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3028 if (reg_offset != 7)
3029 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3030
3031 break;
3032 case CHIP_STONEY:
3033 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034 PIPE_CONFIG(ADDR_SURF_P2) |
3035 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3036 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3037 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3038 PIPE_CONFIG(ADDR_SURF_P2) |
3039 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3041 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3042 PIPE_CONFIG(ADDR_SURF_P2) |
3043 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3044 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3045 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3046 PIPE_CONFIG(ADDR_SURF_P2) |
3047 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3049 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050 PIPE_CONFIG(ADDR_SURF_P2) |
3051 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3053 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3054 PIPE_CONFIG(ADDR_SURF_P2) |
3055 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3056 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3057 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058 PIPE_CONFIG(ADDR_SURF_P2) |
3059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3061 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3062 PIPE_CONFIG(ADDR_SURF_P2));
3063 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3064 PIPE_CONFIG(ADDR_SURF_P2) |
3065 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3067 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3068 PIPE_CONFIG(ADDR_SURF_P2) |
3069 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3071 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3072 PIPE_CONFIG(ADDR_SURF_P2) |
3073 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3075 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3076 PIPE_CONFIG(ADDR_SURF_P2) |
3077 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3079 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3080 PIPE_CONFIG(ADDR_SURF_P2) |
3081 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3083 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3084 PIPE_CONFIG(ADDR_SURF_P2) |
3085 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3087 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3088 PIPE_CONFIG(ADDR_SURF_P2) |
3089 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3091 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3092 PIPE_CONFIG(ADDR_SURF_P2) |
3093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3095 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3096 PIPE_CONFIG(ADDR_SURF_P2) |
3097 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3099 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3100 PIPE_CONFIG(ADDR_SURF_P2) |
3101 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3103 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3104 PIPE_CONFIG(ADDR_SURF_P2) |
3105 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3107 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3108 PIPE_CONFIG(ADDR_SURF_P2) |
3109 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3111 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3112 PIPE_CONFIG(ADDR_SURF_P2) |
3113 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3115 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3116 PIPE_CONFIG(ADDR_SURF_P2) |
3117 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3119 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3120 PIPE_CONFIG(ADDR_SURF_P2) |
3121 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3123 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3124 PIPE_CONFIG(ADDR_SURF_P2) |
3125 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128 PIPE_CONFIG(ADDR_SURF_P2) |
3129 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3131 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3132 PIPE_CONFIG(ADDR_SURF_P2) |
3133 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3135
3136 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3139 NUM_BANKS(ADDR_SURF_8_BANK));
3140 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3143 NUM_BANKS(ADDR_SURF_8_BANK));
3144 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3147 NUM_BANKS(ADDR_SURF_8_BANK));
3148 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3151 NUM_BANKS(ADDR_SURF_8_BANK));
3152 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3155 NUM_BANKS(ADDR_SURF_8_BANK));
3156 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3159 NUM_BANKS(ADDR_SURF_8_BANK));
3160 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3163 NUM_BANKS(ADDR_SURF_8_BANK));
3164 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3167 NUM_BANKS(ADDR_SURF_16_BANK));
3168 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3171 NUM_BANKS(ADDR_SURF_16_BANK));
3172 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3175 NUM_BANKS(ADDR_SURF_16_BANK));
3176 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3179 NUM_BANKS(ADDR_SURF_16_BANK));
3180 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3183 NUM_BANKS(ADDR_SURF_16_BANK));
3184 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3187 NUM_BANKS(ADDR_SURF_16_BANK));
3188 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191 NUM_BANKS(ADDR_SURF_8_BANK));
3192
3193 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3194 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3195 reg_offset != 23)
3196 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3197
3198 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3199 if (reg_offset != 7)
3200 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3201
3202 break;
3203 default:
3204 dev_warn(adev->dev,
3205 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3206 adev->asic_type);
3207 fallthrough;
3208
3209 case CHIP_CARRIZO:
3210 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3211 PIPE_CONFIG(ADDR_SURF_P2) |
3212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3214 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215 PIPE_CONFIG(ADDR_SURF_P2) |
3216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3218 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3219 PIPE_CONFIG(ADDR_SURF_P2) |
3220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3222 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3223 PIPE_CONFIG(ADDR_SURF_P2) |
3224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3226 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3227 PIPE_CONFIG(ADDR_SURF_P2) |
3228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3230 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3231 PIPE_CONFIG(ADDR_SURF_P2) |
3232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3234 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3235 PIPE_CONFIG(ADDR_SURF_P2) |
3236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3238 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3239 PIPE_CONFIG(ADDR_SURF_P2));
3240 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3241 PIPE_CONFIG(ADDR_SURF_P2) |
3242 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3244 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3248 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3252 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3257 PIPE_CONFIG(ADDR_SURF_P2) |
3258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3260 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3261 PIPE_CONFIG(ADDR_SURF_P2) |
3262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3265 PIPE_CONFIG(ADDR_SURF_P2) |
3266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3268 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3269 PIPE_CONFIG(ADDR_SURF_P2) |
3270 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3273 PIPE_CONFIG(ADDR_SURF_P2) |
3274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3276 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3280 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3284 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3288 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3292 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3293 PIPE_CONFIG(ADDR_SURF_P2) |
3294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3296 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3297 PIPE_CONFIG(ADDR_SURF_P2) |
3298 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3300 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3301 PIPE_CONFIG(ADDR_SURF_P2) |
3302 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3305 PIPE_CONFIG(ADDR_SURF_P2) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3308 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3309 PIPE_CONFIG(ADDR_SURF_P2) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3312
3313 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316 NUM_BANKS(ADDR_SURF_8_BANK));
3317 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320 NUM_BANKS(ADDR_SURF_8_BANK));
3321 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3324 NUM_BANKS(ADDR_SURF_8_BANK));
3325 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3328 NUM_BANKS(ADDR_SURF_8_BANK));
3329 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3332 NUM_BANKS(ADDR_SURF_8_BANK));
3333 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3336 NUM_BANKS(ADDR_SURF_8_BANK));
3337 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3340 NUM_BANKS(ADDR_SURF_8_BANK));
3341 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3344 NUM_BANKS(ADDR_SURF_16_BANK));
3345 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 NUM_BANKS(ADDR_SURF_16_BANK));
3349 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352 NUM_BANKS(ADDR_SURF_16_BANK));
3353 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356 NUM_BANKS(ADDR_SURF_16_BANK));
3357 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360 NUM_BANKS(ADDR_SURF_16_BANK));
3361 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3364 NUM_BANKS(ADDR_SURF_16_BANK));
3365 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368 NUM_BANKS(ADDR_SURF_8_BANK));
3369
3370 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3371 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3372 reg_offset != 23)
3373 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3374
3375 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3376 if (reg_offset != 7)
3377 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3378
3379 break;
3380 }
3381 }
3382
gfx_v8_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)3383 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3384 u32 se_num, u32 sh_num, u32 instance,
3385 int xcc_id)
3386 {
3387 u32 data;
3388
3389 if (instance == 0xffffffff)
3390 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3391 else
3392 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3393
3394 if (se_num == 0xffffffff)
3395 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3396 else
3397 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3398
3399 if (sh_num == 0xffffffff)
3400 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3401 else
3402 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3403
3404 WREG32(mmGRBM_GFX_INDEX, data);
3405 }
3406
gfx_v8_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)3407 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3408 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
3409 {
3410 vi_srbm_select(adev, me, pipe, q, vm);
3411 }
3412
gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device * adev)3413 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3414 {
3415 u32 data, mask;
3416
3417 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3418 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3419
3420 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3421
3422 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3423 adev->gfx.config.max_sh_per_se);
3424
3425 return (~data) & mask;
3426 }
3427
3428 static void
gfx_v8_0_raster_config(struct amdgpu_device * adev,u32 * rconf,u32 * rconf1)3429 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3430 {
3431 switch (adev->asic_type) {
3432 case CHIP_FIJI:
3433 case CHIP_VEGAM:
3434 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3435 RB_XSEL2(1) | PKR_MAP(2) |
3436 PKR_XSEL(1) | PKR_YSEL(1) |
3437 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3438 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3439 SE_PAIR_YSEL(2);
3440 break;
3441 case CHIP_TONGA:
3442 case CHIP_POLARIS10:
3443 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3444 SE_XSEL(1) | SE_YSEL(1);
3445 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3446 SE_PAIR_YSEL(2);
3447 break;
3448 case CHIP_TOPAZ:
3449 case CHIP_CARRIZO:
3450 *rconf |= RB_MAP_PKR0(2);
3451 *rconf1 |= 0x0;
3452 break;
3453 case CHIP_POLARIS11:
3454 case CHIP_POLARIS12:
3455 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3456 SE_XSEL(1) | SE_YSEL(1);
3457 *rconf1 |= 0x0;
3458 break;
3459 case CHIP_STONEY:
3460 *rconf |= 0x0;
3461 *rconf1 |= 0x0;
3462 break;
3463 default:
3464 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3465 break;
3466 }
3467 }
3468
3469 static void
gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device * adev,u32 raster_config,u32 raster_config_1,unsigned rb_mask,unsigned num_rb)3470 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3471 u32 raster_config, u32 raster_config_1,
3472 unsigned rb_mask, unsigned num_rb)
3473 {
3474 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3475 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3476 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3477 unsigned rb_per_se = num_rb / num_se;
3478 unsigned se_mask[4];
3479 unsigned se;
3480
3481 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3482 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3483 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3484 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3485
3486 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3487 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3488 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3489
3490 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3491 (!se_mask[2] && !se_mask[3]))) {
3492 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3493
3494 if (!se_mask[0] && !se_mask[1]) {
3495 raster_config_1 |=
3496 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3497 } else {
3498 raster_config_1 |=
3499 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3500 }
3501 }
3502
3503 for (se = 0; se < num_se; se++) {
3504 unsigned raster_config_se = raster_config;
3505 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3506 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3507 int idx = (se / 2) * 2;
3508
3509 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3510 raster_config_se &= ~SE_MAP_MASK;
3511
3512 if (!se_mask[idx]) {
3513 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3514 } else {
3515 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3516 }
3517 }
3518
3519 pkr0_mask &= rb_mask;
3520 pkr1_mask &= rb_mask;
3521 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3522 raster_config_se &= ~PKR_MAP_MASK;
3523
3524 if (!pkr0_mask) {
3525 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3526 } else {
3527 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3528 }
3529 }
3530
3531 if (rb_per_se >= 2) {
3532 unsigned rb0_mask = 1 << (se * rb_per_se);
3533 unsigned rb1_mask = rb0_mask << 1;
3534
3535 rb0_mask &= rb_mask;
3536 rb1_mask &= rb_mask;
3537 if (!rb0_mask || !rb1_mask) {
3538 raster_config_se &= ~RB_MAP_PKR0_MASK;
3539
3540 if (!rb0_mask) {
3541 raster_config_se |=
3542 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3543 } else {
3544 raster_config_se |=
3545 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3546 }
3547 }
3548
3549 if (rb_per_se > 2) {
3550 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3551 rb1_mask = rb0_mask << 1;
3552 rb0_mask &= rb_mask;
3553 rb1_mask &= rb_mask;
3554 if (!rb0_mask || !rb1_mask) {
3555 raster_config_se &= ~RB_MAP_PKR1_MASK;
3556
3557 if (!rb0_mask) {
3558 raster_config_se |=
3559 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3560 } else {
3561 raster_config_se |=
3562 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3563 }
3564 }
3565 }
3566 }
3567
3568 /* GRBM_GFX_INDEX has a different offset on VI */
3569 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
3570 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3571 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3572 }
3573
3574 /* GRBM_GFX_INDEX has a different offset on VI */
3575 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3576 }
3577
gfx_v8_0_setup_rb(struct amdgpu_device * adev)3578 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3579 {
3580 int i, j;
3581 u32 data;
3582 u32 raster_config = 0, raster_config_1 = 0;
3583 u32 active_rbs = 0;
3584 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3585 adev->gfx.config.max_sh_per_se;
3586 unsigned num_rb_pipes;
3587
3588 mutex_lock(&adev->grbm_idx_mutex);
3589 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3590 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3591 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3592 data = gfx_v8_0_get_rb_active_bitmap(adev);
3593 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3594 rb_bitmap_width_per_sh);
3595 }
3596 }
3597 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3598
3599 adev->gfx.config.backend_enable_mask = active_rbs;
3600 adev->gfx.config.num_rbs = hweight32(active_rbs);
3601
3602 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3603 adev->gfx.config.max_shader_engines, 16);
3604
3605 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3606
3607 if (!adev->gfx.config.backend_enable_mask ||
3608 adev->gfx.config.num_rbs >= num_rb_pipes) {
3609 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3610 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3611 } else {
3612 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3613 adev->gfx.config.backend_enable_mask,
3614 num_rb_pipes);
3615 }
3616
3617 /* cache the values for userspace */
3618 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3619 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3620 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3621 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3622 RREG32(mmCC_RB_BACKEND_DISABLE);
3623 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3624 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3625 adev->gfx.config.rb_config[i][j].raster_config =
3626 RREG32(mmPA_SC_RASTER_CONFIG);
3627 adev->gfx.config.rb_config[i][j].raster_config_1 =
3628 RREG32(mmPA_SC_RASTER_CONFIG_1);
3629 }
3630 }
3631 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3632 mutex_unlock(&adev->grbm_idx_mutex);
3633 }
3634
3635 #define DEFAULT_SH_MEM_BASES (0x6000)
3636 /**
3637 * gfx_v8_0_init_compute_vmid - gart enable
3638 *
3639 * @adev: amdgpu_device pointer
3640 *
3641 * Initialize compute vmid sh_mem registers
3642 *
3643 */
gfx_v8_0_init_compute_vmid(struct amdgpu_device * adev)3644 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3645 {
3646 int i;
3647 uint32_t sh_mem_config;
3648 uint32_t sh_mem_bases;
3649
3650 /*
3651 * Configure apertures:
3652 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3653 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3654 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3655 */
3656 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3657
3658 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3659 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3660 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3661 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3662 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3663 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3664
3665 mutex_lock(&adev->srbm_mutex);
3666 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3667 vi_srbm_select(adev, 0, 0, 0, i);
3668 /* CP and shaders */
3669 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3670 WREG32(mmSH_MEM_APE1_BASE, 1);
3671 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3672 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3673 }
3674 vi_srbm_select(adev, 0, 0, 0, 0);
3675 mutex_unlock(&adev->srbm_mutex);
3676
3677 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3678 access. These should be enabled by FW for target VMIDs. */
3679 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3680 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3681 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3682 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3683 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3684 }
3685 }
3686
gfx_v8_0_init_gds_vmid(struct amdgpu_device * adev)3687 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3688 {
3689 int vmid;
3690
3691 /*
3692 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3693 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3694 * the driver can enable them for graphics. VMID0 should maintain
3695 * access so that HWS firmware can save/restore entries.
3696 */
3697 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3698 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3699 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3700 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3701 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3702 }
3703 }
3704
gfx_v8_0_config_init(struct amdgpu_device * adev)3705 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3706 {
3707 switch (adev->asic_type) {
3708 default:
3709 adev->gfx.config.double_offchip_lds_buf = 1;
3710 break;
3711 case CHIP_CARRIZO:
3712 case CHIP_STONEY:
3713 adev->gfx.config.double_offchip_lds_buf = 0;
3714 break;
3715 }
3716 }
3717
gfx_v8_0_constants_init(struct amdgpu_device * adev)3718 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3719 {
3720 u32 tmp, sh_static_mem_cfg;
3721 int i;
3722
3723 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3724 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3725 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3726 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3727
3728 gfx_v8_0_tiling_mode_table_init(adev);
3729 gfx_v8_0_setup_rb(adev);
3730 gfx_v8_0_get_cu_info(adev);
3731 gfx_v8_0_config_init(adev);
3732
3733 /* XXX SH_MEM regs */
3734 /* where to put LDS, scratch, GPUVM in FSA64 space */
3735 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3736 SWIZZLE_ENABLE, 1);
3737 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3738 ELEMENT_SIZE, 1);
3739 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3740 INDEX_STRIDE, 3);
3741 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3742
3743 mutex_lock(&adev->srbm_mutex);
3744 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3745 vi_srbm_select(adev, 0, 0, 0, i);
3746 /* CP and shaders */
3747 if (i == 0) {
3748 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3749 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3750 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3751 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3752 WREG32(mmSH_MEM_CONFIG, tmp);
3753 WREG32(mmSH_MEM_BASES, 0);
3754 } else {
3755 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3756 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3757 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3758 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3759 WREG32(mmSH_MEM_CONFIG, tmp);
3760 tmp = adev->gmc.shared_aperture_start >> 48;
3761 WREG32(mmSH_MEM_BASES, tmp);
3762 }
3763
3764 WREG32(mmSH_MEM_APE1_BASE, 1);
3765 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3766 }
3767 vi_srbm_select(adev, 0, 0, 0, 0);
3768 mutex_unlock(&adev->srbm_mutex);
3769
3770 gfx_v8_0_init_compute_vmid(adev);
3771 gfx_v8_0_init_gds_vmid(adev);
3772
3773 mutex_lock(&adev->grbm_idx_mutex);
3774 /*
3775 * making sure that the following register writes will be broadcasted
3776 * to all the shaders
3777 */
3778 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3779
3780 WREG32(mmPA_SC_FIFO_SIZE,
3781 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3782 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3783 (adev->gfx.config.sc_prim_fifo_size_backend <<
3784 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3785 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3786 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3787 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3788 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3789
3790 tmp = RREG32(mmSPI_ARB_PRIORITY);
3791 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3792 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3793 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3794 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3795 WREG32(mmSPI_ARB_PRIORITY, tmp);
3796
3797 mutex_unlock(&adev->grbm_idx_mutex);
3798
3799 }
3800
gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device * adev)3801 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3802 {
3803 u32 i, j, k;
3804 u32 mask;
3805
3806 mutex_lock(&adev->grbm_idx_mutex);
3807 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3808 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3809 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3810 for (k = 0; k < adev->usec_timeout; k++) {
3811 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3812 break;
3813 udelay(1);
3814 }
3815 if (k == adev->usec_timeout) {
3816 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3817 0xffffffff, 0xffffffff, 0);
3818 mutex_unlock(&adev->grbm_idx_mutex);
3819 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3820 i, j);
3821 return;
3822 }
3823 }
3824 }
3825 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3826 mutex_unlock(&adev->grbm_idx_mutex);
3827
3828 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3829 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3830 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3831 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3832 for (k = 0; k < adev->usec_timeout; k++) {
3833 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3834 break;
3835 udelay(1);
3836 }
3837 }
3838
gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)3839 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3840 bool enable)
3841 {
3842 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3843
3844 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3845 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3846 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3847 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3848
3849 WREG32(mmCP_INT_CNTL_RING0, tmp);
3850 }
3851
gfx_v8_0_init_csb(struct amdgpu_device * adev)3852 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3853 {
3854 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3855 /* csib */
3856 WREG32(mmRLC_CSIB_ADDR_HI,
3857 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3858 WREG32(mmRLC_CSIB_ADDR_LO,
3859 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3860 WREG32(mmRLC_CSIB_LENGTH,
3861 adev->gfx.rlc.clear_state_size);
3862 }
3863
gfx_v8_0_parse_ind_reg_list(int * register_list_format,int ind_offset,int list_size,int * unique_indices,int * indices_count,int max_indices,int * ind_start_offsets,int * offset_count,int max_offset)3864 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3865 int ind_offset,
3866 int list_size,
3867 int *unique_indices,
3868 int *indices_count,
3869 int max_indices,
3870 int *ind_start_offsets,
3871 int *offset_count,
3872 int max_offset)
3873 {
3874 int indices;
3875 bool new_entry = true;
3876
3877 for (; ind_offset < list_size; ind_offset++) {
3878
3879 if (new_entry) {
3880 new_entry = false;
3881 ind_start_offsets[*offset_count] = ind_offset;
3882 *offset_count = *offset_count + 1;
3883 BUG_ON(*offset_count >= max_offset);
3884 }
3885
3886 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3887 new_entry = true;
3888 continue;
3889 }
3890
3891 ind_offset += 2;
3892
3893 /* look for the matching indice */
3894 for (indices = 0;
3895 indices < *indices_count;
3896 indices++) {
3897 if (unique_indices[indices] ==
3898 register_list_format[ind_offset])
3899 break;
3900 }
3901
3902 if (indices >= *indices_count) {
3903 unique_indices[*indices_count] =
3904 register_list_format[ind_offset];
3905 indices = *indices_count;
3906 *indices_count = *indices_count + 1;
3907 BUG_ON(*indices_count >= max_indices);
3908 }
3909
3910 register_list_format[ind_offset] = indices;
3911 }
3912 }
3913
gfx_v8_0_init_save_restore_list(struct amdgpu_device * adev)3914 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3915 {
3916 int i, temp, data;
3917 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3918 int indices_count = 0;
3919 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3920 int offset_count = 0;
3921
3922 int list_size;
3923 unsigned int *register_list_format =
3924 kmemdup(adev->gfx.rlc.register_list_format,
3925 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3926 if (!register_list_format)
3927 return -ENOMEM;
3928
3929 gfx_v8_0_parse_ind_reg_list(register_list_format,
3930 RLC_FormatDirectRegListLength,
3931 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3932 unique_indices,
3933 &indices_count,
3934 ARRAY_SIZE(unique_indices),
3935 indirect_start_offsets,
3936 &offset_count,
3937 ARRAY_SIZE(indirect_start_offsets));
3938
3939 /* save and restore list */
3940 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3941
3942 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3943 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3944 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3945
3946 /* indirect list */
3947 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3948 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3949 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3950
3951 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3952 list_size = list_size >> 1;
3953 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3954 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3955
3956 /* starting offsets starts */
3957 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3958 adev->gfx.rlc.starting_offsets_start);
3959 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3960 WREG32(mmRLC_GPM_SCRATCH_DATA,
3961 indirect_start_offsets[i]);
3962
3963 /* unique indices */
3964 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3965 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3966 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3967 if (unique_indices[i] != 0) {
3968 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3969 WREG32(data + i, unique_indices[i] >> 20);
3970 }
3971 }
3972 kfree(register_list_format);
3973
3974 return 0;
3975 }
3976
gfx_v8_0_enable_save_restore_machine(struct amdgpu_device * adev)3977 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3978 {
3979 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3980 }
3981
gfx_v8_0_init_power_gating(struct amdgpu_device * adev)3982 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3983 {
3984 uint32_t data;
3985
3986 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3987
3988 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3989 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3990 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3991 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3992 WREG32(mmRLC_PG_DELAY, data);
3993
3994 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3995 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3996
3997 }
3998
cz_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)3999 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4000 bool enable)
4001 {
4002 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4003 }
4004
cz_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)4005 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4006 bool enable)
4007 {
4008 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4009 }
4010
cz_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)4011 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4012 {
4013 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4014 }
4015
gfx_v8_0_init_pg(struct amdgpu_device * adev)4016 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4017 {
4018 if ((adev->asic_type == CHIP_CARRIZO) ||
4019 (adev->asic_type == CHIP_STONEY)) {
4020 gfx_v8_0_init_csb(adev);
4021 gfx_v8_0_init_save_restore_list(adev);
4022 gfx_v8_0_enable_save_restore_machine(adev);
4023 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4024 gfx_v8_0_init_power_gating(adev);
4025 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4026 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4027 (adev->asic_type == CHIP_POLARIS12) ||
4028 (adev->asic_type == CHIP_VEGAM)) {
4029 gfx_v8_0_init_csb(adev);
4030 gfx_v8_0_init_save_restore_list(adev);
4031 gfx_v8_0_enable_save_restore_machine(adev);
4032 gfx_v8_0_init_power_gating(adev);
4033 }
4034
4035 }
4036
gfx_v8_0_rlc_stop(struct amdgpu_device * adev)4037 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4038 {
4039 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4040
4041 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4042 gfx_v8_0_wait_for_rlc_serdes(adev);
4043 }
4044
gfx_v8_0_rlc_reset(struct amdgpu_device * adev)4045 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4046 {
4047 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4048 udelay(50);
4049
4050 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4051 udelay(50);
4052 }
4053
gfx_v8_0_rlc_start(struct amdgpu_device * adev)4054 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4055 {
4056 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4057
4058 /* carrizo do enable cp interrupt after cp inited */
4059 if (!(adev->flags & AMD_IS_APU))
4060 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4061
4062 udelay(50);
4063 }
4064
gfx_v8_0_rlc_resume(struct amdgpu_device * adev)4065 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4066 {
4067 if (amdgpu_sriov_vf(adev)) {
4068 gfx_v8_0_init_csb(adev);
4069 return 0;
4070 }
4071
4072 adev->gfx.rlc.funcs->stop(adev);
4073 adev->gfx.rlc.funcs->reset(adev);
4074 gfx_v8_0_init_pg(adev);
4075 adev->gfx.rlc.funcs->start(adev);
4076
4077 return 0;
4078 }
4079
gfx_v8_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)4080 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4081 {
4082 u32 tmp = RREG32(mmCP_ME_CNTL);
4083
4084 if (enable) {
4085 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4086 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4087 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4088 } else {
4089 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4090 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4091 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4092 }
4093 WREG32(mmCP_ME_CNTL, tmp);
4094 udelay(50);
4095 }
4096
gfx_v8_0_get_csb_size(struct amdgpu_device * adev)4097 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4098 {
4099 u32 count = 0;
4100 const struct cs_section_def *sect = NULL;
4101 const struct cs_extent_def *ext = NULL;
4102
4103 /* begin clear state */
4104 count += 2;
4105 /* context control state */
4106 count += 3;
4107
4108 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4109 for (ext = sect->section; ext->extent != NULL; ++ext) {
4110 if (sect->id == SECT_CONTEXT)
4111 count += 2 + ext->reg_count;
4112 else
4113 return 0;
4114 }
4115 }
4116 /* pa_sc_raster_config/pa_sc_raster_config1 */
4117 count += 4;
4118 /* end clear state */
4119 count += 2;
4120 /* clear state */
4121 count += 2;
4122
4123 return count;
4124 }
4125
gfx_v8_0_cp_gfx_start(struct amdgpu_device * adev)4126 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4127 {
4128 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4129 const struct cs_section_def *sect = NULL;
4130 const struct cs_extent_def *ext = NULL;
4131 int r, i;
4132
4133 /* init the CP */
4134 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4135 WREG32(mmCP_ENDIAN_SWAP, 0);
4136 WREG32(mmCP_DEVICE_ID, 1);
4137
4138 gfx_v8_0_cp_gfx_enable(adev, true);
4139
4140 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4141 if (r) {
4142 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4143 return r;
4144 }
4145
4146 /* clear state buffer */
4147 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4148 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4149
4150 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4151 amdgpu_ring_write(ring, 0x80000000);
4152 amdgpu_ring_write(ring, 0x80000000);
4153
4154 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4155 for (ext = sect->section; ext->extent != NULL; ++ext) {
4156 if (sect->id == SECT_CONTEXT) {
4157 amdgpu_ring_write(ring,
4158 PACKET3(PACKET3_SET_CONTEXT_REG,
4159 ext->reg_count));
4160 amdgpu_ring_write(ring,
4161 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4162 for (i = 0; i < ext->reg_count; i++)
4163 amdgpu_ring_write(ring, ext->extent[i]);
4164 }
4165 }
4166 }
4167
4168 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4169 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4170 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4171 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4172
4173 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4174 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4175
4176 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4177 amdgpu_ring_write(ring, 0);
4178
4179 /* init the CE partitions */
4180 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4181 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4182 amdgpu_ring_write(ring, 0x8000);
4183 amdgpu_ring_write(ring, 0x8000);
4184
4185 amdgpu_ring_commit(ring);
4186
4187 return 0;
4188 }
gfx_v8_0_set_cpg_door_bell(struct amdgpu_device * adev,struct amdgpu_ring * ring)4189 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4190 {
4191 u32 tmp;
4192 /* no gfx doorbells on iceland */
4193 if (adev->asic_type == CHIP_TOPAZ)
4194 return;
4195
4196 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4197
4198 if (ring->use_doorbell) {
4199 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4200 DOORBELL_OFFSET, ring->doorbell_index);
4201 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4202 DOORBELL_HIT, 0);
4203 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4204 DOORBELL_EN, 1);
4205 } else {
4206 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4207 }
4208
4209 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4210
4211 if (adev->flags & AMD_IS_APU)
4212 return;
4213
4214 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4215 DOORBELL_RANGE_LOWER,
4216 adev->doorbell_index.gfx_ring0);
4217 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4218
4219 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4220 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4221 }
4222
gfx_v8_0_cp_gfx_resume(struct amdgpu_device * adev)4223 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4224 {
4225 struct amdgpu_ring *ring;
4226 u32 tmp;
4227 u32 rb_bufsz;
4228 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4229
4230 /* Set the write pointer delay */
4231 WREG32(mmCP_RB_WPTR_DELAY, 0);
4232
4233 /* set the RB to use vmid 0 */
4234 WREG32(mmCP_RB_VMID, 0);
4235
4236 /* Set ring buffer size */
4237 ring = &adev->gfx.gfx_ring[0];
4238 rb_bufsz = order_base_2(ring->ring_size / 8);
4239 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4240 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4241 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4242 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4243 #ifdef __BIG_ENDIAN
4244 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4245 #endif
4246 WREG32(mmCP_RB0_CNTL, tmp);
4247
4248 /* Initialize the ring buffer's read and write pointers */
4249 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4250 ring->wptr = 0;
4251 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4252
4253 /* set the wb address whether it's enabled or not */
4254 rptr_addr = ring->rptr_gpu_addr;
4255 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4256 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4257
4258 wptr_gpu_addr = ring->wptr_gpu_addr;
4259 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4260 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4261 mdelay(1);
4262 WREG32(mmCP_RB0_CNTL, tmp);
4263
4264 rb_addr = ring->gpu_addr >> 8;
4265 WREG32(mmCP_RB0_BASE, rb_addr);
4266 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4267
4268 gfx_v8_0_set_cpg_door_bell(adev, ring);
4269 /* start the ring */
4270 amdgpu_ring_clear_ring(ring);
4271 gfx_v8_0_cp_gfx_start(adev);
4272
4273 return 0;
4274 }
4275
gfx_v8_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)4276 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4277 {
4278 if (enable) {
4279 WREG32(mmCP_MEC_CNTL, 0);
4280 } else {
4281 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4282 adev->gfx.kiq[0].ring.sched.ready = false;
4283 }
4284 udelay(50);
4285 }
4286
4287 /* KIQ functions */
gfx_v8_0_kiq_setting(struct amdgpu_ring * ring)4288 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4289 {
4290 uint32_t tmp;
4291 struct amdgpu_device *adev = ring->adev;
4292
4293 /* tell RLC which is KIQ queue */
4294 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4295 tmp &= 0xffffff00;
4296 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4297 WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80);
4298 }
4299
gfx_v8_0_kiq_kcq_enable(struct amdgpu_device * adev)4300 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4301 {
4302 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4303 uint64_t queue_mask = 0;
4304 int r, i;
4305
4306 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4307 if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
4308 continue;
4309
4310 /* This situation may be hit in the future if a new HW
4311 * generation exposes more than 64 queues. If so, the
4312 * definition of queue_mask needs updating */
4313 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4314 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4315 break;
4316 }
4317
4318 queue_mask |= (1ull << i);
4319 }
4320
4321 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4322 if (r) {
4323 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4324 return r;
4325 }
4326 /* set resources */
4327 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4328 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4329 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4330 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4331 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4332 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4333 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4334 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4335 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4336 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4337 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4338 uint64_t wptr_addr = ring->wptr_gpu_addr;
4339
4340 /* map queues */
4341 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4342 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4343 amdgpu_ring_write(kiq_ring,
4344 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4345 amdgpu_ring_write(kiq_ring,
4346 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4347 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4348 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4349 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4350 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4351 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4352 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4353 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4354 }
4355
4356 amdgpu_ring_commit(kiq_ring);
4357
4358 return 0;
4359 }
4360
gfx_v8_0_deactivate_hqd(struct amdgpu_device * adev,u32 req)4361 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4362 {
4363 int i, r = 0;
4364
4365 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4366 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4367 for (i = 0; i < adev->usec_timeout; i++) {
4368 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4369 break;
4370 udelay(1);
4371 }
4372 if (i == adev->usec_timeout)
4373 r = -ETIMEDOUT;
4374 }
4375 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4376 WREG32(mmCP_HQD_PQ_RPTR, 0);
4377 WREG32(mmCP_HQD_PQ_WPTR, 0);
4378
4379 return r;
4380 }
4381
gfx_v8_0_mqd_set_priority(struct amdgpu_ring * ring,struct vi_mqd * mqd)4382 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4383 {
4384 struct amdgpu_device *adev = ring->adev;
4385
4386 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4387 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4388 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4389 mqd->cp_hqd_queue_priority =
4390 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4391 }
4392 }
4393 }
4394
gfx_v8_0_mqd_init(struct amdgpu_ring * ring)4395 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4396 {
4397 struct amdgpu_device *adev = ring->adev;
4398 struct vi_mqd *mqd = ring->mqd_ptr;
4399 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4400 uint32_t tmp;
4401
4402 mqd->header = 0xC0310800;
4403 mqd->compute_pipelinestat_enable = 0x00000001;
4404 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4405 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4406 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4407 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4408 mqd->compute_misc_reserved = 0x00000003;
4409 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4410 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4411 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4412 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4413 eop_base_addr = ring->eop_gpu_addr >> 8;
4414 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4415 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4416
4417 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4418 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4419 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4420 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4421
4422 mqd->cp_hqd_eop_control = tmp;
4423
4424 /* enable doorbell? */
4425 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4426 CP_HQD_PQ_DOORBELL_CONTROL,
4427 DOORBELL_EN,
4428 ring->use_doorbell ? 1 : 0);
4429
4430 mqd->cp_hqd_pq_doorbell_control = tmp;
4431
4432 /* set the pointer to the MQD */
4433 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4434 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4435
4436 /* set MQD vmid to 0 */
4437 tmp = RREG32(mmCP_MQD_CONTROL);
4438 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4439 mqd->cp_mqd_control = tmp;
4440
4441 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4442 hqd_gpu_addr = ring->gpu_addr >> 8;
4443 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4444 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4445
4446 /* set up the HQD, this is similar to CP_RB0_CNTL */
4447 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4448 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4449 (order_base_2(ring->ring_size / 4) - 1));
4450 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4451 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4452 #ifdef __BIG_ENDIAN
4453 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4454 #endif
4455 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4456 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4457 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4458 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4459 mqd->cp_hqd_pq_control = tmp;
4460
4461 /* set the wb address whether it's enabled or not */
4462 wb_gpu_addr = ring->rptr_gpu_addr;
4463 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4464 mqd->cp_hqd_pq_rptr_report_addr_hi =
4465 upper_32_bits(wb_gpu_addr) & 0xffff;
4466
4467 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4468 wb_gpu_addr = ring->wptr_gpu_addr;
4469 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4470 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4471
4472 tmp = 0;
4473 /* enable the doorbell if requested */
4474 if (ring->use_doorbell) {
4475 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4476 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4477 DOORBELL_OFFSET, ring->doorbell_index);
4478
4479 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4480 DOORBELL_EN, 1);
4481 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4482 DOORBELL_SOURCE, 0);
4483 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4484 DOORBELL_HIT, 0);
4485 }
4486
4487 mqd->cp_hqd_pq_doorbell_control = tmp;
4488
4489 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4490 ring->wptr = 0;
4491 mqd->cp_hqd_pq_wptr = ring->wptr;
4492 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4493
4494 /* set the vmid for the queue */
4495 mqd->cp_hqd_vmid = 0;
4496
4497 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4498 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4499 mqd->cp_hqd_persistent_state = tmp;
4500
4501 /* set MTYPE */
4502 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4503 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4504 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4505 mqd->cp_hqd_ib_control = tmp;
4506
4507 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4508 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4509 mqd->cp_hqd_iq_timer = tmp;
4510
4511 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4512 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4513 mqd->cp_hqd_ctx_save_control = tmp;
4514
4515 /* defaults */
4516 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4517 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4518 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4519 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4520 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4521 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4522 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4523 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4524 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4525 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4526 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4527 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4528
4529 /* set static priority for a queue/ring */
4530 gfx_v8_0_mqd_set_priority(ring, mqd);
4531 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4532
4533 /* map_queues packet doesn't need activate the queue,
4534 * so only kiq need set this field.
4535 */
4536 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4537 mqd->cp_hqd_active = 1;
4538
4539 return 0;
4540 }
4541
gfx_v8_0_mqd_commit(struct amdgpu_device * adev,struct vi_mqd * mqd)4542 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4543 struct vi_mqd *mqd)
4544 {
4545 uint32_t mqd_reg;
4546 uint32_t *mqd_data;
4547
4548 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4549 mqd_data = &mqd->cp_mqd_base_addr_lo;
4550
4551 /* disable wptr polling */
4552 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4553
4554 /* program all HQD registers */
4555 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4556 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4557
4558 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4559 * This is safe since EOP RPTR==WPTR for any inactive HQD
4560 * on ASICs that do not support context-save.
4561 * EOP writes/reads can start anywhere in the ring.
4562 */
4563 if (adev->asic_type != CHIP_TONGA) {
4564 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4565 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4566 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4567 }
4568
4569 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4570 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4571
4572 /* activate the HQD */
4573 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4574 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4575
4576 return 0;
4577 }
4578
gfx_v8_0_kiq_init_queue(struct amdgpu_ring * ring)4579 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4580 {
4581 struct amdgpu_device *adev = ring->adev;
4582 struct vi_mqd *mqd = ring->mqd_ptr;
4583
4584 gfx_v8_0_kiq_setting(ring);
4585
4586 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4587 /* reset MQD to a clean status */
4588 if (adev->gfx.kiq[0].mqd_backup)
4589 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
4590
4591 /* reset ring buffer */
4592 ring->wptr = 0;
4593 amdgpu_ring_clear_ring(ring);
4594 mutex_lock(&adev->srbm_mutex);
4595 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4596 gfx_v8_0_mqd_commit(adev, mqd);
4597 vi_srbm_select(adev, 0, 0, 0, 0);
4598 mutex_unlock(&adev->srbm_mutex);
4599 } else {
4600 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4601 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4602 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4603 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4604 amdgpu_ring_clear_ring(ring);
4605 mutex_lock(&adev->srbm_mutex);
4606 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4607 gfx_v8_0_mqd_init(ring);
4608 gfx_v8_0_mqd_commit(adev, mqd);
4609 vi_srbm_select(adev, 0, 0, 0, 0);
4610 mutex_unlock(&adev->srbm_mutex);
4611
4612 if (adev->gfx.kiq[0].mqd_backup)
4613 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
4614 }
4615
4616 return 0;
4617 }
4618
gfx_v8_0_kcq_init_queue(struct amdgpu_ring * ring)4619 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4620 {
4621 struct amdgpu_device *adev = ring->adev;
4622 struct vi_mqd *mqd = ring->mqd_ptr;
4623 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4624
4625 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4626 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4627 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4628 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4629 mutex_lock(&adev->srbm_mutex);
4630 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4631 gfx_v8_0_mqd_init(ring);
4632 vi_srbm_select(adev, 0, 0, 0, 0);
4633 mutex_unlock(&adev->srbm_mutex);
4634
4635 if (adev->gfx.mec.mqd_backup[mqd_idx])
4636 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4637 } else {
4638 /* restore MQD to a clean status */
4639 if (adev->gfx.mec.mqd_backup[mqd_idx])
4640 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4641 /* reset ring buffer */
4642 ring->wptr = 0;
4643 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4644 amdgpu_ring_clear_ring(ring);
4645 }
4646 return 0;
4647 }
4648
gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device * adev)4649 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4650 {
4651 if (adev->asic_type > CHIP_TONGA) {
4652 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4653 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4654 }
4655 /* enable doorbells */
4656 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4657 }
4658
gfx_v8_0_kiq_resume(struct amdgpu_device * adev)4659 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4660 {
4661 gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
4662 return 0;
4663 }
4664
gfx_v8_0_kcq_resume(struct amdgpu_device * adev)4665 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4666 {
4667 int i, r;
4668
4669 gfx_v8_0_cp_compute_enable(adev, true);
4670
4671 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4672 r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]);
4673 if (r)
4674 return r;
4675 }
4676
4677 gfx_v8_0_set_mec_doorbell_range(adev);
4678
4679 return gfx_v8_0_kiq_kcq_enable(adev);
4680 }
4681
gfx_v8_0_cp_test_all_rings(struct amdgpu_device * adev)4682 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4683 {
4684 int r, i;
4685 struct amdgpu_ring *ring;
4686
4687 /* collect all the ring_tests here, gfx, kiq, compute */
4688 ring = &adev->gfx.gfx_ring[0];
4689 r = amdgpu_ring_test_helper(ring);
4690 if (r)
4691 return r;
4692
4693 ring = &adev->gfx.kiq[0].ring;
4694 r = amdgpu_ring_test_helper(ring);
4695 if (r)
4696 return r;
4697
4698 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4699 ring = &adev->gfx.compute_ring[i];
4700 amdgpu_ring_test_helper(ring);
4701 }
4702
4703 return 0;
4704 }
4705
gfx_v8_0_cp_resume(struct amdgpu_device * adev)4706 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4707 {
4708 int r;
4709
4710 if (!(adev->flags & AMD_IS_APU))
4711 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4712
4713 r = gfx_v8_0_kiq_resume(adev);
4714 if (r)
4715 return r;
4716
4717 r = gfx_v8_0_cp_gfx_resume(adev);
4718 if (r)
4719 return r;
4720
4721 r = gfx_v8_0_kcq_resume(adev);
4722 if (r)
4723 return r;
4724
4725 r = gfx_v8_0_cp_test_all_rings(adev);
4726 if (r)
4727 return r;
4728
4729 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4730
4731 return 0;
4732 }
4733
gfx_v8_0_cp_enable(struct amdgpu_device * adev,bool enable)4734 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4735 {
4736 gfx_v8_0_cp_gfx_enable(adev, enable);
4737 gfx_v8_0_cp_compute_enable(adev, enable);
4738 }
4739
gfx_v8_0_hw_init(struct amdgpu_ip_block * ip_block)4740 static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
4741 {
4742 int r;
4743 struct amdgpu_device *adev = ip_block->adev;
4744
4745 gfx_v8_0_init_golden_registers(adev);
4746 gfx_v8_0_constants_init(adev);
4747
4748 r = adev->gfx.rlc.funcs->resume(adev);
4749 if (r)
4750 return r;
4751
4752 r = gfx_v8_0_cp_resume(adev);
4753
4754 return r;
4755 }
4756
gfx_v8_0_kcq_disable(struct amdgpu_device * adev)4757 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4758 {
4759 int r, i;
4760 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4761
4762 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4763 if (r)
4764 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4765
4766 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4767 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4768
4769 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4770 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4771 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4772 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4773 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4774 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4775 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4776 amdgpu_ring_write(kiq_ring, 0);
4777 amdgpu_ring_write(kiq_ring, 0);
4778 amdgpu_ring_write(kiq_ring, 0);
4779 }
4780 /* Submit unmap queue packet */
4781 amdgpu_ring_commit(kiq_ring);
4782 /*
4783 * Ring test will do a basic scratch register change check. Just run
4784 * this to ensure that unmap queues that is submitted before got
4785 * processed successfully before returning.
4786 */
4787 r = amdgpu_ring_test_helper(kiq_ring);
4788 if (r)
4789 DRM_ERROR("KCQ disable failed\n");
4790
4791 return r;
4792 }
4793
gfx_v8_0_is_idle(struct amdgpu_ip_block * ip_block)4794 static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
4795 {
4796 struct amdgpu_device *adev = ip_block->adev;
4797
4798 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4799 || RREG32(mmGRBM_STATUS2) != 0x8)
4800 return false;
4801 else
4802 return true;
4803 }
4804
gfx_v8_0_rlc_is_idle(void * handle)4805 static bool gfx_v8_0_rlc_is_idle(void *handle)
4806 {
4807 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4808
4809 if (RREG32(mmGRBM_STATUS2) != 0x8)
4810 return false;
4811 else
4812 return true;
4813 }
4814
gfx_v8_0_wait_for_rlc_idle(void * handle)4815 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4816 {
4817 unsigned int i;
4818 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4819
4820 for (i = 0; i < adev->usec_timeout; i++) {
4821 if (gfx_v8_0_rlc_is_idle(handle))
4822 return 0;
4823
4824 udelay(1);
4825 }
4826 return -ETIMEDOUT;
4827 }
4828
gfx_v8_0_wait_for_idle(struct amdgpu_ip_block * ip_block)4829 static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4830 {
4831 unsigned int i;
4832 struct amdgpu_device *adev = ip_block->adev;
4833
4834 for (i = 0; i < adev->usec_timeout; i++) {
4835 if (gfx_v8_0_is_idle(ip_block))
4836 return 0;
4837
4838 udelay(1);
4839 }
4840 return -ETIMEDOUT;
4841 }
4842
gfx_v8_0_hw_fini(struct amdgpu_ip_block * ip_block)4843 static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
4844 {
4845 struct amdgpu_device *adev = ip_block->adev;
4846
4847 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4848 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4849
4850 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4851
4852 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4853
4854 /* disable KCQ to avoid CPC touch memory not valid anymore */
4855 gfx_v8_0_kcq_disable(adev);
4856
4857 if (amdgpu_sriov_vf(adev)) {
4858 pr_debug("For SRIOV client, shouldn't do anything.\n");
4859 return 0;
4860 }
4861
4862 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4863 if (!gfx_v8_0_wait_for_idle(ip_block))
4864 gfx_v8_0_cp_enable(adev, false);
4865 else
4866 pr_err("cp is busy, skip halt cp\n");
4867 if (!gfx_v8_0_wait_for_rlc_idle(adev))
4868 adev->gfx.rlc.funcs->stop(adev);
4869 else
4870 pr_err("rlc is busy, skip halt rlc\n");
4871 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4872
4873 return 0;
4874 }
4875
gfx_v8_0_suspend(struct amdgpu_ip_block * ip_block)4876 static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
4877 {
4878 return gfx_v8_0_hw_fini(ip_block);
4879 }
4880
gfx_v8_0_resume(struct amdgpu_ip_block * ip_block)4881 static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
4882 {
4883 return gfx_v8_0_hw_init(ip_block);
4884 }
4885
gfx_v8_0_check_soft_reset(struct amdgpu_ip_block * ip_block)4886 static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
4887 {
4888 struct amdgpu_device *adev = ip_block->adev;
4889 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4890 u32 tmp;
4891
4892 /* GRBM_STATUS */
4893 tmp = RREG32(mmGRBM_STATUS);
4894 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4895 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4896 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4897 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4898 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4899 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4900 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4901 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4902 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4903 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4904 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4905 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4906 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4907 }
4908
4909 /* GRBM_STATUS2 */
4910 tmp = RREG32(mmGRBM_STATUS2);
4911 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4912 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4913 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4914
4915 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4916 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4917 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4918 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4919 SOFT_RESET_CPF, 1);
4920 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4921 SOFT_RESET_CPC, 1);
4922 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4923 SOFT_RESET_CPG, 1);
4924 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4925 SOFT_RESET_GRBM, 1);
4926 }
4927
4928 /* SRBM_STATUS */
4929 tmp = RREG32(mmSRBM_STATUS);
4930 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4931 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4932 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4933 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4934 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4935 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4936
4937 if (grbm_soft_reset || srbm_soft_reset) {
4938 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4939 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4940 return true;
4941 } else {
4942 adev->gfx.grbm_soft_reset = 0;
4943 adev->gfx.srbm_soft_reset = 0;
4944 return false;
4945 }
4946 }
4947
gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block * ip_block)4948 static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
4949 {
4950 struct amdgpu_device *adev = ip_block->adev;
4951 u32 grbm_soft_reset = 0;
4952
4953 if ((!adev->gfx.grbm_soft_reset) &&
4954 (!adev->gfx.srbm_soft_reset))
4955 return 0;
4956
4957 grbm_soft_reset = adev->gfx.grbm_soft_reset;
4958
4959 /* stop the rlc */
4960 adev->gfx.rlc.funcs->stop(adev);
4961
4962 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4963 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4964 /* Disable GFX parsing/prefetching */
4965 gfx_v8_0_cp_gfx_enable(adev, false);
4966
4967 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4968 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4969 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4970 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4971 int i;
4972
4973 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4974 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4975
4976 mutex_lock(&adev->srbm_mutex);
4977 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4978 gfx_v8_0_deactivate_hqd(adev, 2);
4979 vi_srbm_select(adev, 0, 0, 0, 0);
4980 mutex_unlock(&adev->srbm_mutex);
4981 }
4982 /* Disable MEC parsing/prefetching */
4983 gfx_v8_0_cp_compute_enable(adev, false);
4984 }
4985
4986 return 0;
4987 }
4988
gfx_v8_0_soft_reset(struct amdgpu_ip_block * ip_block)4989 static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
4990 {
4991 struct amdgpu_device *adev = ip_block->adev;
4992 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4993 u32 tmp;
4994
4995 if ((!adev->gfx.grbm_soft_reset) &&
4996 (!adev->gfx.srbm_soft_reset))
4997 return 0;
4998
4999 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5000 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5001
5002 if (grbm_soft_reset || srbm_soft_reset) {
5003 tmp = RREG32(mmGMCON_DEBUG);
5004 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5005 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5006 WREG32(mmGMCON_DEBUG, tmp);
5007 udelay(50);
5008 }
5009
5010 if (grbm_soft_reset) {
5011 tmp = RREG32(mmGRBM_SOFT_RESET);
5012 tmp |= grbm_soft_reset;
5013 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5014 WREG32(mmGRBM_SOFT_RESET, tmp);
5015 tmp = RREG32(mmGRBM_SOFT_RESET);
5016
5017 udelay(50);
5018
5019 tmp &= ~grbm_soft_reset;
5020 WREG32(mmGRBM_SOFT_RESET, tmp);
5021 tmp = RREG32(mmGRBM_SOFT_RESET);
5022 }
5023
5024 if (srbm_soft_reset) {
5025 tmp = RREG32(mmSRBM_SOFT_RESET);
5026 tmp |= srbm_soft_reset;
5027 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5028 WREG32(mmSRBM_SOFT_RESET, tmp);
5029 tmp = RREG32(mmSRBM_SOFT_RESET);
5030
5031 udelay(50);
5032
5033 tmp &= ~srbm_soft_reset;
5034 WREG32(mmSRBM_SOFT_RESET, tmp);
5035 tmp = RREG32(mmSRBM_SOFT_RESET);
5036 }
5037
5038 if (grbm_soft_reset || srbm_soft_reset) {
5039 tmp = RREG32(mmGMCON_DEBUG);
5040 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5041 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5042 WREG32(mmGMCON_DEBUG, tmp);
5043 }
5044
5045 /* Wait a little for things to settle down */
5046 udelay(50);
5047
5048 return 0;
5049 }
5050
gfx_v8_0_post_soft_reset(struct amdgpu_ip_block * ip_block)5051 static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
5052 {
5053 struct amdgpu_device *adev = ip_block->adev;
5054 u32 grbm_soft_reset = 0;
5055
5056 if ((!adev->gfx.grbm_soft_reset) &&
5057 (!adev->gfx.srbm_soft_reset))
5058 return 0;
5059
5060 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5061
5062 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5063 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5064 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5065 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5066 int i;
5067
5068 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5069 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5070
5071 mutex_lock(&adev->srbm_mutex);
5072 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5073 gfx_v8_0_deactivate_hqd(adev, 2);
5074 vi_srbm_select(adev, 0, 0, 0, 0);
5075 mutex_unlock(&adev->srbm_mutex);
5076 }
5077 gfx_v8_0_kiq_resume(adev);
5078 gfx_v8_0_kcq_resume(adev);
5079 }
5080
5081 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5082 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5083 gfx_v8_0_cp_gfx_resume(adev);
5084
5085 gfx_v8_0_cp_test_all_rings(adev);
5086
5087 adev->gfx.rlc.funcs->start(adev);
5088
5089 return 0;
5090 }
5091
5092 /**
5093 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5094 *
5095 * @adev: amdgpu_device pointer
5096 *
5097 * Fetches a GPU clock counter snapshot.
5098 * Returns the 64 bit clock counter snapshot.
5099 */
gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device * adev)5100 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5101 {
5102 uint64_t clock;
5103
5104 mutex_lock(&adev->gfx.gpu_clock_mutex);
5105 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5106 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5107 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5108 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5109 return clock;
5110 }
5111
gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)5112 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5113 uint32_t vmid,
5114 uint32_t gds_base, uint32_t gds_size,
5115 uint32_t gws_base, uint32_t gws_size,
5116 uint32_t oa_base, uint32_t oa_size)
5117 {
5118 /* GDS Base */
5119 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5120 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5121 WRITE_DATA_DST_SEL(0)));
5122 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5123 amdgpu_ring_write(ring, 0);
5124 amdgpu_ring_write(ring, gds_base);
5125
5126 /* GDS Size */
5127 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5128 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5129 WRITE_DATA_DST_SEL(0)));
5130 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5131 amdgpu_ring_write(ring, 0);
5132 amdgpu_ring_write(ring, gds_size);
5133
5134 /* GWS */
5135 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5136 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5137 WRITE_DATA_DST_SEL(0)));
5138 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5139 amdgpu_ring_write(ring, 0);
5140 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5141
5142 /* OA */
5143 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5144 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5145 WRITE_DATA_DST_SEL(0)));
5146 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5147 amdgpu_ring_write(ring, 0);
5148 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5149 }
5150
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)5151 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5152 {
5153 WREG32(mmSQ_IND_INDEX,
5154 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5155 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5156 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5157 (SQ_IND_INDEX__FORCE_READ_MASK));
5158 return RREG32(mmSQ_IND_DATA);
5159 }
5160
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)5161 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5162 uint32_t wave, uint32_t thread,
5163 uint32_t regno, uint32_t num, uint32_t *out)
5164 {
5165 WREG32(mmSQ_IND_INDEX,
5166 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5167 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5168 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5169 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5170 (SQ_IND_INDEX__FORCE_READ_MASK) |
5171 (SQ_IND_INDEX__AUTO_INCR_MASK));
5172 while (num--)
5173 *(out++) = RREG32(mmSQ_IND_DATA);
5174 }
5175
gfx_v8_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)5176 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5177 {
5178 /* type 0 wave data */
5179 dst[(*no_fields)++] = 0;
5180 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5181 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5182 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5183 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5184 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5185 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5188 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5189 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5190 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5191 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5192 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5193 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5194 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5195 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5196 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5197 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5198 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5199 }
5200
gfx_v8_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)5201 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
5202 uint32_t wave, uint32_t start,
5203 uint32_t size, uint32_t *dst)
5204 {
5205 wave_read_regs(
5206 adev, simd, wave, 0,
5207 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5208 }
5209
5210
5211 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5212 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5213 .select_se_sh = &gfx_v8_0_select_se_sh,
5214 .read_wave_data = &gfx_v8_0_read_wave_data,
5215 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5216 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5217 };
5218
gfx_v8_0_early_init(struct amdgpu_ip_block * ip_block)5219 static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
5220 {
5221 struct amdgpu_device *adev = ip_block->adev;
5222
5223 adev->gfx.xcc_mask = 1;
5224 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5225 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5226 AMDGPU_MAX_COMPUTE_RINGS);
5227 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5228 gfx_v8_0_set_ring_funcs(adev);
5229 gfx_v8_0_set_irq_funcs(adev);
5230 gfx_v8_0_set_gds_init(adev);
5231 gfx_v8_0_set_rlc_funcs(adev);
5232
5233 return 0;
5234 }
5235
gfx_v8_0_late_init(struct amdgpu_ip_block * ip_block)5236 static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
5237 {
5238 struct amdgpu_device *adev = ip_block->adev;
5239 int r;
5240
5241 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5242 if (r)
5243 return r;
5244
5245 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5246 if (r)
5247 return r;
5248
5249 /* requires IBs so do in late init after IB pool is initialized */
5250 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5251 if (r)
5252 return r;
5253
5254 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5255 if (r) {
5256 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5257 return r;
5258 }
5259
5260 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5261 if (r) {
5262 DRM_ERROR(
5263 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5264 r);
5265 return r;
5266 }
5267
5268 return 0;
5269 }
5270
gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)5271 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5272 bool enable)
5273 {
5274 if ((adev->asic_type == CHIP_POLARIS11) ||
5275 (adev->asic_type == CHIP_POLARIS12) ||
5276 (adev->asic_type == CHIP_VEGAM))
5277 /* Send msg to SMU via Powerplay */
5278 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0);
5279
5280 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5281 }
5282
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)5283 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5284 bool enable)
5285 {
5286 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5287 }
5288
polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device * adev,bool enable)5289 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5290 bool enable)
5291 {
5292 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5293 }
5294
cz_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)5295 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5296 bool enable)
5297 {
5298 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5299 }
5300
cz_enable_gfx_pipeline_power_gating(struct amdgpu_device * adev,bool enable)5301 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5302 bool enable)
5303 {
5304 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5305
5306 /* Read any GFX register to wake up GFX. */
5307 if (!enable)
5308 RREG32(mmDB_RENDER_CONTROL);
5309 }
5310
cz_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)5311 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5312 bool enable)
5313 {
5314 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5315 cz_enable_gfx_cg_power_gating(adev, true);
5316 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5317 cz_enable_gfx_pipeline_power_gating(adev, true);
5318 } else {
5319 cz_enable_gfx_cg_power_gating(adev, false);
5320 cz_enable_gfx_pipeline_power_gating(adev, false);
5321 }
5322 }
5323
gfx_v8_0_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)5324 static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5325 enum amd_powergating_state state)
5326 {
5327 struct amdgpu_device *adev = ip_block->adev;
5328 bool enable = (state == AMD_PG_STATE_GATE);
5329
5330 if (amdgpu_sriov_vf(adev))
5331 return 0;
5332
5333 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5334 AMD_PG_SUPPORT_RLC_SMU_HS |
5335 AMD_PG_SUPPORT_CP |
5336 AMD_PG_SUPPORT_GFX_DMG))
5337 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5338 switch (adev->asic_type) {
5339 case CHIP_CARRIZO:
5340 case CHIP_STONEY:
5341
5342 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5343 cz_enable_sck_slow_down_on_power_up(adev, true);
5344 cz_enable_sck_slow_down_on_power_down(adev, true);
5345 } else {
5346 cz_enable_sck_slow_down_on_power_up(adev, false);
5347 cz_enable_sck_slow_down_on_power_down(adev, false);
5348 }
5349 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5350 cz_enable_cp_power_gating(adev, true);
5351 else
5352 cz_enable_cp_power_gating(adev, false);
5353
5354 cz_update_gfx_cg_power_gating(adev, enable);
5355
5356 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5357 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5358 else
5359 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5360
5361 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5362 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5363 else
5364 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5365 break;
5366 case CHIP_POLARIS11:
5367 case CHIP_POLARIS12:
5368 case CHIP_VEGAM:
5369 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5370 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5371 else
5372 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5373
5374 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5375 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5376 else
5377 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5378
5379 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5380 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5381 else
5382 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5383 break;
5384 default:
5385 break;
5386 }
5387 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5388 AMD_PG_SUPPORT_RLC_SMU_HS |
5389 AMD_PG_SUPPORT_CP |
5390 AMD_PG_SUPPORT_GFX_DMG))
5391 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5392 return 0;
5393 }
5394
gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block * ip_block,u64 * flags)5395 static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
5396 {
5397 struct amdgpu_device *adev = ip_block->adev;
5398 int data;
5399
5400 if (amdgpu_sriov_vf(adev))
5401 *flags = 0;
5402
5403 /* AMD_CG_SUPPORT_GFX_MGCG */
5404 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5405 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5406 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5407
5408 /* AMD_CG_SUPPORT_GFX_CGLG */
5409 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5410 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5411 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5412
5413 /* AMD_CG_SUPPORT_GFX_CGLS */
5414 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5415 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5416
5417 /* AMD_CG_SUPPORT_GFX_CGTS */
5418 data = RREG32(mmCGTS_SM_CTRL_REG);
5419 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5420 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5421
5422 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5423 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5424 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5425
5426 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5427 data = RREG32(mmRLC_MEM_SLP_CNTL);
5428 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5429 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5430
5431 /* AMD_CG_SUPPORT_GFX_CP_LS */
5432 data = RREG32(mmCP_MEM_SLP_CNTL);
5433 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5434 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5435 }
5436
gfx_v8_0_send_serdes_cmd(struct amdgpu_device * adev,uint32_t reg_addr,uint32_t cmd)5437 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5438 uint32_t reg_addr, uint32_t cmd)
5439 {
5440 uint32_t data;
5441
5442 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5443
5444 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5445 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5446
5447 data = RREG32(mmRLC_SERDES_WR_CTRL);
5448 if (adev->asic_type == CHIP_STONEY)
5449 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5450 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5451 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5452 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5453 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5454 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5455 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5456 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5457 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5458 else
5459 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5460 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5461 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5462 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5463 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5464 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5465 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5466 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5467 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5468 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5469 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5470 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5471 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5472 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5473 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5474
5475 WREG32(mmRLC_SERDES_WR_CTRL, data);
5476 }
5477
5478 #define MSG_ENTER_RLC_SAFE_MODE 1
5479 #define MSG_EXIT_RLC_SAFE_MODE 0
5480 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5481 #define RLC_GPR_REG2__REQ__SHIFT 0
5482 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5483 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5484
gfx_v8_0_is_rlc_enabled(struct amdgpu_device * adev)5485 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5486 {
5487 uint32_t rlc_setting;
5488
5489 rlc_setting = RREG32(mmRLC_CNTL);
5490 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5491 return false;
5492
5493 return true;
5494 }
5495
gfx_v8_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)5496 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5497 {
5498 uint32_t data;
5499 unsigned i;
5500 data = RREG32(mmRLC_CNTL);
5501 data |= RLC_SAFE_MODE__CMD_MASK;
5502 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5503 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5504 WREG32(mmRLC_SAFE_MODE, data);
5505
5506 /* wait for RLC_SAFE_MODE */
5507 for (i = 0; i < adev->usec_timeout; i++) {
5508 if ((RREG32(mmRLC_GPM_STAT) &
5509 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5510 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5511 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5512 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5513 break;
5514 udelay(1);
5515 }
5516 for (i = 0; i < adev->usec_timeout; i++) {
5517 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5518 break;
5519 udelay(1);
5520 }
5521 }
5522
gfx_v8_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)5523 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5524 {
5525 uint32_t data;
5526 unsigned i;
5527
5528 data = RREG32(mmRLC_CNTL);
5529 data |= RLC_SAFE_MODE__CMD_MASK;
5530 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5531 WREG32(mmRLC_SAFE_MODE, data);
5532
5533 for (i = 0; i < adev->usec_timeout; i++) {
5534 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5535 break;
5536 udelay(1);
5537 }
5538 }
5539
gfx_v8_0_update_spm_vmid(struct amdgpu_device * adev,struct amdgpu_ring * ring,unsigned vmid)5540 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5541 {
5542 u32 data;
5543
5544 amdgpu_gfx_off_ctrl(adev, false);
5545
5546 if (amdgpu_sriov_is_pp_one_vf(adev))
5547 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5548 else
5549 data = RREG32(mmRLC_SPM_VMID);
5550
5551 data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5552 data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5553
5554 if (amdgpu_sriov_is_pp_one_vf(adev))
5555 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5556 else
5557 WREG32(mmRLC_SPM_VMID, data);
5558
5559 amdgpu_gfx_off_ctrl(adev, true);
5560 }
5561
5562 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5563 .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5564 .set_safe_mode = gfx_v8_0_set_safe_mode,
5565 .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5566 .init = gfx_v8_0_rlc_init,
5567 .get_csb_size = gfx_v8_0_get_csb_size,
5568 .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5569 .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5570 .resume = gfx_v8_0_rlc_resume,
5571 .stop = gfx_v8_0_rlc_stop,
5572 .reset = gfx_v8_0_rlc_reset,
5573 .start = gfx_v8_0_rlc_start,
5574 .update_spm_vmid = gfx_v8_0_update_spm_vmid
5575 };
5576
gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)5577 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5578 bool enable)
5579 {
5580 uint32_t temp, data;
5581
5582 /* It is disabled by HW by default */
5583 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5584 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5585 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5586 /* 1 - RLC memory Light sleep */
5587 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5588
5589 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5590 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5591 }
5592
5593 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5594 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5595 if (adev->flags & AMD_IS_APU)
5596 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5597 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5598 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5599 else
5600 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5601 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5602 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5603 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5604
5605 if (temp != data)
5606 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5607
5608 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5609 gfx_v8_0_wait_for_rlc_serdes(adev);
5610
5611 /* 5 - clear mgcg override */
5612 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5613
5614 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5615 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5616 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5617 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5618 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5619 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5620 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5621 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5622 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5623 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5624 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5625 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5626 if (temp != data)
5627 WREG32(mmCGTS_SM_CTRL_REG, data);
5628 }
5629 udelay(50);
5630
5631 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5632 gfx_v8_0_wait_for_rlc_serdes(adev);
5633 } else {
5634 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5635 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5636 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5637 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5638 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5639 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5640 if (temp != data)
5641 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5642
5643 /* 2 - disable MGLS in RLC */
5644 data = RREG32(mmRLC_MEM_SLP_CNTL);
5645 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5646 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5647 WREG32(mmRLC_MEM_SLP_CNTL, data);
5648 }
5649
5650 /* 3 - disable MGLS in CP */
5651 data = RREG32(mmCP_MEM_SLP_CNTL);
5652 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5653 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5654 WREG32(mmCP_MEM_SLP_CNTL, data);
5655 }
5656
5657 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5658 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5659 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5660 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5661 if (temp != data)
5662 WREG32(mmCGTS_SM_CTRL_REG, data);
5663
5664 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5665 gfx_v8_0_wait_for_rlc_serdes(adev);
5666
5667 /* 6 - set mgcg override */
5668 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5669
5670 udelay(50);
5671
5672 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5673 gfx_v8_0_wait_for_rlc_serdes(adev);
5674 }
5675 }
5676
gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5677 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5678 bool enable)
5679 {
5680 uint32_t temp, temp1, data, data1;
5681
5682 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5683
5684 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5685 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5686 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5687 if (temp1 != data1)
5688 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5689
5690 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5691 gfx_v8_0_wait_for_rlc_serdes(adev);
5692
5693 /* 2 - clear cgcg override */
5694 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5695
5696 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5697 gfx_v8_0_wait_for_rlc_serdes(adev);
5698
5699 /* 3 - write cmd to set CGLS */
5700 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5701
5702 /* 4 - enable cgcg */
5703 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5704
5705 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5706 /* enable cgls*/
5707 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5708
5709 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5710 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5711
5712 if (temp1 != data1)
5713 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5714 } else {
5715 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5716 }
5717
5718 if (temp != data)
5719 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5720
5721 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5722 * Cmp_busy/GFX_Idle interrupts
5723 */
5724 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5725 } else {
5726 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5727 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5728
5729 /* TEST CGCG */
5730 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5731 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5732 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5733 if (temp1 != data1)
5734 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5735
5736 /* read gfx register to wake up cgcg */
5737 RREG32(mmCB_CGTT_SCLK_CTRL);
5738 RREG32(mmCB_CGTT_SCLK_CTRL);
5739 RREG32(mmCB_CGTT_SCLK_CTRL);
5740 RREG32(mmCB_CGTT_SCLK_CTRL);
5741
5742 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743 gfx_v8_0_wait_for_rlc_serdes(adev);
5744
5745 /* write cmd to Set CGCG Override */
5746 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5747
5748 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5749 gfx_v8_0_wait_for_rlc_serdes(adev);
5750
5751 /* write cmd to Clear CGLS */
5752 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5753
5754 /* disable cgcg, cgls should be disabled too. */
5755 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5756 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5757 if (temp != data)
5758 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5759 /* enable interrupts again for PG */
5760 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5761 }
5762
5763 gfx_v8_0_wait_for_rlc_serdes(adev);
5764 }
gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5765 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5766 bool enable)
5767 {
5768 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5769
5770 if (enable) {
5771 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5772 * === MGCG + MGLS + TS(CG/LS) ===
5773 */
5774 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5775 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5776 } else {
5777 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5778 * === CGCG + CGLS ===
5779 */
5780 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5781 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5782 }
5783
5784 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5785 return 0;
5786 }
5787
gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device * adev,enum amd_clockgating_state state)5788 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5789 enum amd_clockgating_state state)
5790 {
5791 uint32_t msg_id, pp_state = 0;
5792 uint32_t pp_support_state = 0;
5793
5794 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5795 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5796 pp_support_state = PP_STATE_SUPPORT_LS;
5797 pp_state = PP_STATE_LS;
5798 }
5799 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5800 pp_support_state |= PP_STATE_SUPPORT_CG;
5801 pp_state |= PP_STATE_CG;
5802 }
5803 if (state == AMD_CG_STATE_UNGATE)
5804 pp_state = 0;
5805
5806 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5807 PP_BLOCK_GFX_CG,
5808 pp_support_state,
5809 pp_state);
5810 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5811 }
5812
5813 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5814 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5815 pp_support_state = PP_STATE_SUPPORT_LS;
5816 pp_state = PP_STATE_LS;
5817 }
5818
5819 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5820 pp_support_state |= PP_STATE_SUPPORT_CG;
5821 pp_state |= PP_STATE_CG;
5822 }
5823
5824 if (state == AMD_CG_STATE_UNGATE)
5825 pp_state = 0;
5826
5827 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5828 PP_BLOCK_GFX_MG,
5829 pp_support_state,
5830 pp_state);
5831 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5832 }
5833
5834 return 0;
5835 }
5836
gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device * adev,enum amd_clockgating_state state)5837 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5838 enum amd_clockgating_state state)
5839 {
5840
5841 uint32_t msg_id, pp_state = 0;
5842 uint32_t pp_support_state = 0;
5843
5844 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5845 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5846 pp_support_state = PP_STATE_SUPPORT_LS;
5847 pp_state = PP_STATE_LS;
5848 }
5849 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5850 pp_support_state |= PP_STATE_SUPPORT_CG;
5851 pp_state |= PP_STATE_CG;
5852 }
5853 if (state == AMD_CG_STATE_UNGATE)
5854 pp_state = 0;
5855
5856 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5857 PP_BLOCK_GFX_CG,
5858 pp_support_state,
5859 pp_state);
5860 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5861 }
5862
5863 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5864 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5865 pp_support_state = PP_STATE_SUPPORT_LS;
5866 pp_state = PP_STATE_LS;
5867 }
5868 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5869 pp_support_state |= PP_STATE_SUPPORT_CG;
5870 pp_state |= PP_STATE_CG;
5871 }
5872 if (state == AMD_CG_STATE_UNGATE)
5873 pp_state = 0;
5874
5875 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5876 PP_BLOCK_GFX_3D,
5877 pp_support_state,
5878 pp_state);
5879 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5880 }
5881
5882 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5883 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5884 pp_support_state = PP_STATE_SUPPORT_LS;
5885 pp_state = PP_STATE_LS;
5886 }
5887
5888 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5889 pp_support_state |= PP_STATE_SUPPORT_CG;
5890 pp_state |= PP_STATE_CG;
5891 }
5892
5893 if (state == AMD_CG_STATE_UNGATE)
5894 pp_state = 0;
5895
5896 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5897 PP_BLOCK_GFX_MG,
5898 pp_support_state,
5899 pp_state);
5900 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5901 }
5902
5903 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5904 pp_support_state = PP_STATE_SUPPORT_LS;
5905
5906 if (state == AMD_CG_STATE_UNGATE)
5907 pp_state = 0;
5908 else
5909 pp_state = PP_STATE_LS;
5910
5911 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5912 PP_BLOCK_GFX_RLC,
5913 pp_support_state,
5914 pp_state);
5915 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5916 }
5917
5918 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5919 pp_support_state = PP_STATE_SUPPORT_LS;
5920
5921 if (state == AMD_CG_STATE_UNGATE)
5922 pp_state = 0;
5923 else
5924 pp_state = PP_STATE_LS;
5925 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5926 PP_BLOCK_GFX_CP,
5927 pp_support_state,
5928 pp_state);
5929 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5930 }
5931
5932 return 0;
5933 }
5934
gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)5935 static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5936 enum amd_clockgating_state state)
5937 {
5938 struct amdgpu_device *adev = ip_block->adev;
5939
5940 if (amdgpu_sriov_vf(adev))
5941 return 0;
5942
5943 switch (adev->asic_type) {
5944 case CHIP_FIJI:
5945 case CHIP_CARRIZO:
5946 case CHIP_STONEY:
5947 gfx_v8_0_update_gfx_clock_gating(adev,
5948 state == AMD_CG_STATE_GATE);
5949 break;
5950 case CHIP_TONGA:
5951 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5952 break;
5953 case CHIP_POLARIS10:
5954 case CHIP_POLARIS11:
5955 case CHIP_POLARIS12:
5956 case CHIP_VEGAM:
5957 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5958 break;
5959 default:
5960 break;
5961 }
5962 return 0;
5963 }
5964
gfx_v8_0_ring_get_rptr(struct amdgpu_ring * ring)5965 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5966 {
5967 return *ring->rptr_cpu_addr;
5968 }
5969
gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5970 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5971 {
5972 struct amdgpu_device *adev = ring->adev;
5973
5974 if (ring->use_doorbell)
5975 /* XXX check if swapping is necessary on BE */
5976 return *ring->wptr_cpu_addr;
5977 else
5978 return RREG32(mmCP_RB0_WPTR);
5979 }
5980
gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5981 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5982 {
5983 struct amdgpu_device *adev = ring->adev;
5984
5985 if (ring->use_doorbell) {
5986 /* XXX check if swapping is necessary on BE */
5987 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
5988 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5989 } else {
5990 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5991 (void)RREG32(mmCP_RB0_WPTR);
5992 }
5993 }
5994
gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5995 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5996 {
5997 u32 ref_and_mask, reg_mem_engine;
5998
5999 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6000 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6001 switch (ring->me) {
6002 case 1:
6003 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6004 break;
6005 case 2:
6006 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6007 break;
6008 default:
6009 return;
6010 }
6011 reg_mem_engine = 0;
6012 } else {
6013 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6014 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6015 }
6016
6017 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6018 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6019 WAIT_REG_MEM_FUNCTION(3) | /* == */
6020 reg_mem_engine));
6021 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6022 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6023 amdgpu_ring_write(ring, ref_and_mask);
6024 amdgpu_ring_write(ring, ref_and_mask);
6025 amdgpu_ring_write(ring, 0x20); /* poll interval */
6026 }
6027
gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring * ring)6028 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6029 {
6030 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6031 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6032 EVENT_INDEX(4));
6033
6034 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6035 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6036 EVENT_INDEX(0));
6037 }
6038
gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)6039 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6040 struct amdgpu_job *job,
6041 struct amdgpu_ib *ib,
6042 uint32_t flags)
6043 {
6044 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6045 u32 header, control = 0;
6046
6047 if (ib->flags & AMDGPU_IB_FLAG_CE)
6048 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6049 else
6050 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6051
6052 control |= ib->length_dw | (vmid << 24);
6053
6054 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6055 control |= INDIRECT_BUFFER_PRE_ENB(1);
6056
6057 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6058 gfx_v8_0_ring_emit_de_meta(ring);
6059 }
6060
6061 amdgpu_ring_write(ring, header);
6062 amdgpu_ring_write(ring,
6063 #ifdef __BIG_ENDIAN
6064 (2 << 0) |
6065 #endif
6066 (ib->gpu_addr & 0xFFFFFFFC));
6067 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6068 amdgpu_ring_write(ring, control);
6069 }
6070
gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)6071 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6072 struct amdgpu_job *job,
6073 struct amdgpu_ib *ib,
6074 uint32_t flags)
6075 {
6076 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6077 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6078
6079 /* Currently, there is a high possibility to get wave ID mismatch
6080 * between ME and GDS, leading to a hw deadlock, because ME generates
6081 * different wave IDs than the GDS expects. This situation happens
6082 * randomly when at least 5 compute pipes use GDS ordered append.
6083 * The wave IDs generated by ME are also wrong after suspend/resume.
6084 * Those are probably bugs somewhere else in the kernel driver.
6085 *
6086 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6087 * GDS to 0 for this ring (me/pipe).
6088 */
6089 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6090 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6091 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6092 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6093 }
6094
6095 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6096 amdgpu_ring_write(ring,
6097 #ifdef __BIG_ENDIAN
6098 (2 << 0) |
6099 #endif
6100 (ib->gpu_addr & 0xFFFFFFFC));
6101 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6102 amdgpu_ring_write(ring, control);
6103 }
6104
gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)6105 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6106 u64 seq, unsigned flags)
6107 {
6108 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6109 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6110 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
6111
6112 /* Workaround for cache flush problems. First send a dummy EOP
6113 * event down the pipe with seq one below.
6114 */
6115 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6116 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6117 EOP_TC_ACTION_EN |
6118 EOP_TC_WB_ACTION_EN |
6119 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6120 EVENT_INDEX(5)));
6121 amdgpu_ring_write(ring, addr & 0xfffffffc);
6122 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6123 DATA_SEL(1) | INT_SEL(0));
6124 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6125 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6126
6127 /* Then send the real EOP event down the pipe:
6128 * EVENT_WRITE_EOP - flush caches, send int */
6129 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6130 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6131 EOP_TC_ACTION_EN |
6132 EOP_TC_WB_ACTION_EN |
6133 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6134 EVENT_INDEX(5) |
6135 (exec ? EOP_EXEC : 0)));
6136 amdgpu_ring_write(ring, addr & 0xfffffffc);
6137 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6138 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6139 amdgpu_ring_write(ring, lower_32_bits(seq));
6140 amdgpu_ring_write(ring, upper_32_bits(seq));
6141
6142 }
6143
gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)6144 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6145 {
6146 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6147 uint32_t seq = ring->fence_drv.sync_seq;
6148 uint64_t addr = ring->fence_drv.gpu_addr;
6149
6150 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6151 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6152 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6153 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6154 amdgpu_ring_write(ring, addr & 0xfffffffc);
6155 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6156 amdgpu_ring_write(ring, seq);
6157 amdgpu_ring_write(ring, 0xffffffff);
6158 amdgpu_ring_write(ring, 4); /* poll interval */
6159 }
6160
gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)6161 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6162 unsigned vmid, uint64_t pd_addr)
6163 {
6164 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6165
6166 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6167
6168 /* wait for the invalidate to complete */
6169 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6170 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6171 WAIT_REG_MEM_FUNCTION(0) | /* always */
6172 WAIT_REG_MEM_ENGINE(0))); /* me */
6173 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6174 amdgpu_ring_write(ring, 0);
6175 amdgpu_ring_write(ring, 0); /* ref */
6176 amdgpu_ring_write(ring, 0); /* mask */
6177 amdgpu_ring_write(ring, 0x20); /* poll interval */
6178
6179 /* compute doesn't have PFP */
6180 if (usepfp) {
6181 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6182 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6183 amdgpu_ring_write(ring, 0x0);
6184 }
6185 }
6186
gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring * ring)6187 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6188 {
6189 return *ring->wptr_cpu_addr;
6190 }
6191
gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring * ring)6192 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6193 {
6194 struct amdgpu_device *adev = ring->adev;
6195
6196 /* XXX check if swapping is necessary on BE */
6197 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6198 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6199 }
6200
gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)6201 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6202 u64 addr, u64 seq,
6203 unsigned flags)
6204 {
6205 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6206 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6207
6208 /* RELEASE_MEM - flush caches, send int */
6209 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6210 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6211 EOP_TC_ACTION_EN |
6212 EOP_TC_WB_ACTION_EN |
6213 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6214 EVENT_INDEX(5)));
6215 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6216 amdgpu_ring_write(ring, addr & 0xfffffffc);
6217 amdgpu_ring_write(ring, upper_32_bits(addr));
6218 amdgpu_ring_write(ring, lower_32_bits(seq));
6219 amdgpu_ring_write(ring, upper_32_bits(seq));
6220 }
6221
gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)6222 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6223 u64 seq, unsigned int flags)
6224 {
6225 /* we only allocate 32bit for each seq wb address */
6226 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6227
6228 /* write fence seq to the "addr" */
6229 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6230 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6231 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6232 amdgpu_ring_write(ring, lower_32_bits(addr));
6233 amdgpu_ring_write(ring, upper_32_bits(addr));
6234 amdgpu_ring_write(ring, lower_32_bits(seq));
6235
6236 if (flags & AMDGPU_FENCE_FLAG_INT) {
6237 /* set register to trigger INT */
6238 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6239 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6240 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6241 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6242 amdgpu_ring_write(ring, 0);
6243 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6244 }
6245 }
6246
gfx_v8_ring_emit_sb(struct amdgpu_ring * ring)6247 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6248 {
6249 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6250 amdgpu_ring_write(ring, 0);
6251 }
6252
gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)6253 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6254 {
6255 uint32_t dw2 = 0;
6256
6257 if (amdgpu_sriov_vf(ring->adev))
6258 gfx_v8_0_ring_emit_ce_meta(ring);
6259
6260 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6261 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6262 gfx_v8_0_ring_emit_vgt_flush(ring);
6263 /* set load_global_config & load_global_uconfig */
6264 dw2 |= 0x8001;
6265 /* set load_cs_sh_regs */
6266 dw2 |= 0x01000000;
6267 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6268 dw2 |= 0x10002;
6269
6270 /* set load_ce_ram if preamble presented */
6271 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6272 dw2 |= 0x10000000;
6273 } else {
6274 /* still load_ce_ram if this is the first time preamble presented
6275 * although there is no context switch happens.
6276 */
6277 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6278 dw2 |= 0x10000000;
6279 }
6280
6281 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6282 amdgpu_ring_write(ring, dw2);
6283 amdgpu_ring_write(ring, 0);
6284 }
6285
gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)6286 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
6287 uint64_t addr)
6288 {
6289 unsigned ret;
6290
6291 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6292 amdgpu_ring_write(ring, lower_32_bits(addr));
6293 amdgpu_ring_write(ring, upper_32_bits(addr));
6294 /* discard following DWs if *cond_exec_gpu_addr==0 */
6295 amdgpu_ring_write(ring, 0);
6296 ret = ring->wptr & ring->buf_mask;
6297 /* patch dummy value later */
6298 amdgpu_ring_write(ring, 0);
6299 return ret;
6300 }
6301
gfx_v8_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)6302 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6303 uint32_t reg_val_offs)
6304 {
6305 struct amdgpu_device *adev = ring->adev;
6306
6307 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6308 amdgpu_ring_write(ring, 0 | /* src: register*/
6309 (5 << 8) | /* dst: memory */
6310 (1 << 20)); /* write confirm */
6311 amdgpu_ring_write(ring, reg);
6312 amdgpu_ring_write(ring, 0);
6313 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6314 reg_val_offs * 4));
6315 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6316 reg_val_offs * 4));
6317 }
6318
gfx_v8_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)6319 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6320 uint32_t val)
6321 {
6322 uint32_t cmd;
6323
6324 switch (ring->funcs->type) {
6325 case AMDGPU_RING_TYPE_GFX:
6326 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6327 break;
6328 case AMDGPU_RING_TYPE_KIQ:
6329 cmd = 1 << 16; /* no inc addr */
6330 break;
6331 default:
6332 cmd = WR_CONFIRM;
6333 break;
6334 }
6335
6336 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6337 amdgpu_ring_write(ring, cmd);
6338 amdgpu_ring_write(ring, reg);
6339 amdgpu_ring_write(ring, 0);
6340 amdgpu_ring_write(ring, val);
6341 }
6342
gfx_v8_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)6343 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6344 {
6345 struct amdgpu_device *adev = ring->adev;
6346 uint32_t value = 0;
6347
6348 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6349 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6350 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6351 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6352 WREG32(mmSQ_CMD, value);
6353 }
6354
gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)6355 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6356 enum amdgpu_interrupt_state state)
6357 {
6358 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6359 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6360 }
6361
gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)6362 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6363 int me, int pipe,
6364 enum amdgpu_interrupt_state state)
6365 {
6366 u32 mec_int_cntl, mec_int_cntl_reg;
6367
6368 /*
6369 * amdgpu controls only the first MEC. That's why this function only
6370 * handles the setting of interrupts for this specific MEC. All other
6371 * pipes' interrupts are set by amdkfd.
6372 */
6373
6374 if (me == 1) {
6375 switch (pipe) {
6376 case 0:
6377 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6378 break;
6379 case 1:
6380 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6381 break;
6382 case 2:
6383 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6384 break;
6385 case 3:
6386 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6387 break;
6388 default:
6389 DRM_DEBUG("invalid pipe %d\n", pipe);
6390 return;
6391 }
6392 } else {
6393 DRM_DEBUG("invalid me %d\n", me);
6394 return;
6395 }
6396
6397 switch (state) {
6398 case AMDGPU_IRQ_STATE_DISABLE:
6399 mec_int_cntl = RREG32(mec_int_cntl_reg);
6400 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6401 WREG32(mec_int_cntl_reg, mec_int_cntl);
6402 break;
6403 case AMDGPU_IRQ_STATE_ENABLE:
6404 mec_int_cntl = RREG32(mec_int_cntl_reg);
6405 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6406 WREG32(mec_int_cntl_reg, mec_int_cntl);
6407 break;
6408 default:
6409 break;
6410 }
6411 }
6412
gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6413 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6414 struct amdgpu_irq_src *source,
6415 unsigned type,
6416 enum amdgpu_interrupt_state state)
6417 {
6418 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6419 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6420
6421 return 0;
6422 }
6423
gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6424 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6425 struct amdgpu_irq_src *source,
6426 unsigned type,
6427 enum amdgpu_interrupt_state state)
6428 {
6429 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6430 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6431
6432 return 0;
6433 }
6434
gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6435 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6436 struct amdgpu_irq_src *src,
6437 unsigned type,
6438 enum amdgpu_interrupt_state state)
6439 {
6440 switch (type) {
6441 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6442 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6443 break;
6444 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6445 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6446 break;
6447 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6448 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6449 break;
6450 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6451 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6452 break;
6453 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6454 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6455 break;
6456 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6457 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6458 break;
6459 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6460 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6461 break;
6462 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6463 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6464 break;
6465 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6466 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6467 break;
6468 default:
6469 break;
6470 }
6471 return 0;
6472 }
6473
gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)6474 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6475 struct amdgpu_irq_src *source,
6476 unsigned int type,
6477 enum amdgpu_interrupt_state state)
6478 {
6479 int enable_flag;
6480
6481 switch (state) {
6482 case AMDGPU_IRQ_STATE_DISABLE:
6483 enable_flag = 0;
6484 break;
6485
6486 case AMDGPU_IRQ_STATE_ENABLE:
6487 enable_flag = 1;
6488 break;
6489
6490 default:
6491 return -EINVAL;
6492 }
6493
6494 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6495 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6496 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6497 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6498 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6499 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6500 enable_flag);
6501 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6502 enable_flag);
6503 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6504 enable_flag);
6505 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6506 enable_flag);
6507 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6508 enable_flag);
6509 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6510 enable_flag);
6511 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6512 enable_flag);
6513 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6514 enable_flag);
6515
6516 return 0;
6517 }
6518
gfx_v8_0_set_sq_int_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)6519 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6520 struct amdgpu_irq_src *source,
6521 unsigned int type,
6522 enum amdgpu_interrupt_state state)
6523 {
6524 int enable_flag;
6525
6526 switch (state) {
6527 case AMDGPU_IRQ_STATE_DISABLE:
6528 enable_flag = 1;
6529 break;
6530
6531 case AMDGPU_IRQ_STATE_ENABLE:
6532 enable_flag = 0;
6533 break;
6534
6535 default:
6536 return -EINVAL;
6537 }
6538
6539 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6540 enable_flag);
6541
6542 return 0;
6543 }
6544
gfx_v8_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6545 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6546 struct amdgpu_irq_src *source,
6547 struct amdgpu_iv_entry *entry)
6548 {
6549 int i;
6550 u8 me_id, pipe_id, queue_id;
6551 struct amdgpu_ring *ring;
6552
6553 DRM_DEBUG("IH: CP EOP\n");
6554 me_id = (entry->ring_id & 0x0c) >> 2;
6555 pipe_id = (entry->ring_id & 0x03) >> 0;
6556 queue_id = (entry->ring_id & 0x70) >> 4;
6557
6558 switch (me_id) {
6559 case 0:
6560 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6561 break;
6562 case 1:
6563 case 2:
6564 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6565 ring = &adev->gfx.compute_ring[i];
6566 /* Per-queue interrupt is supported for MEC starting from VI.
6567 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6568 */
6569 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6570 amdgpu_fence_process(ring);
6571 }
6572 break;
6573 }
6574 return 0;
6575 }
6576
gfx_v8_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6577 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6578 struct amdgpu_iv_entry *entry)
6579 {
6580 u8 me_id, pipe_id, queue_id;
6581 struct amdgpu_ring *ring;
6582 int i;
6583
6584 me_id = (entry->ring_id & 0x0c) >> 2;
6585 pipe_id = (entry->ring_id & 0x03) >> 0;
6586 queue_id = (entry->ring_id & 0x70) >> 4;
6587
6588 switch (me_id) {
6589 case 0:
6590 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6591 break;
6592 case 1:
6593 case 2:
6594 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6595 ring = &adev->gfx.compute_ring[i];
6596 if (ring->me == me_id && ring->pipe == pipe_id &&
6597 ring->queue == queue_id)
6598 drm_sched_fault(&ring->sched);
6599 }
6600 break;
6601 }
6602 }
6603
gfx_v8_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6604 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6605 struct amdgpu_irq_src *source,
6606 struct amdgpu_iv_entry *entry)
6607 {
6608 DRM_ERROR("Illegal register access in command stream\n");
6609 gfx_v8_0_fault(adev, entry);
6610 return 0;
6611 }
6612
gfx_v8_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6613 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6614 struct amdgpu_irq_src *source,
6615 struct amdgpu_iv_entry *entry)
6616 {
6617 DRM_ERROR("Illegal instruction in command stream\n");
6618 gfx_v8_0_fault(adev, entry);
6619 return 0;
6620 }
6621
gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6622 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6623 struct amdgpu_irq_src *source,
6624 struct amdgpu_iv_entry *entry)
6625 {
6626 DRM_ERROR("CP EDC/ECC error detected.");
6627 return 0;
6628 }
6629
gfx_v8_0_parse_sq_irq(struct amdgpu_device * adev,unsigned ih_data,bool from_wq)6630 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6631 bool from_wq)
6632 {
6633 u32 enc, se_id, sh_id, cu_id;
6634 char type[20];
6635 int sq_edc_source = -1;
6636
6637 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6638 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6639
6640 switch (enc) {
6641 case 0:
6642 DRM_INFO("SQ general purpose intr detected:"
6643 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6644 "host_cmd_overflow %d, cmd_timestamp %d,"
6645 "reg_timestamp %d, thread_trace_buff_full %d,"
6646 "wlt %d, thread_trace %d.\n",
6647 se_id,
6648 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6649 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6650 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6651 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6652 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6653 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6654 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6655 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6656 );
6657 break;
6658 case 1:
6659 case 2:
6660
6661 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6662 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6663
6664 /*
6665 * This function can be called either directly from ISR
6666 * or from BH in which case we can access SQ_EDC_INFO
6667 * instance
6668 */
6669 if (from_wq) {
6670 mutex_lock(&adev->grbm_idx_mutex);
6671 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
6672
6673 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6674
6675 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6676 mutex_unlock(&adev->grbm_idx_mutex);
6677 }
6678
6679 if (enc == 1)
6680 sprintf(type, "instruction intr");
6681 else
6682 sprintf(type, "EDC/ECC error");
6683
6684 DRM_INFO(
6685 "SQ %s detected: "
6686 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6687 "trap %s, sq_ed_info.source %s.\n",
6688 type, se_id, sh_id, cu_id,
6689 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6690 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6691 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6692 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6693 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6694 );
6695 break;
6696 default:
6697 DRM_ERROR("SQ invalid encoding type\n.");
6698 }
6699 }
6700
gfx_v8_0_sq_irq_work_func(struct work_struct * work)6701 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6702 {
6703
6704 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6705 struct sq_work *sq_work = container_of(work, struct sq_work, work);
6706
6707 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6708 }
6709
gfx_v8_0_sq_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6710 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6711 struct amdgpu_irq_src *source,
6712 struct amdgpu_iv_entry *entry)
6713 {
6714 unsigned ih_data = entry->src_data[0];
6715
6716 /*
6717 * Try to submit work so SQ_EDC_INFO can be accessed from
6718 * BH. If previous work submission hasn't finished yet
6719 * just print whatever info is possible directly from the ISR.
6720 */
6721 if (work_pending(&adev->gfx.sq_work.work)) {
6722 gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6723 } else {
6724 adev->gfx.sq_work.ih_data = ih_data;
6725 schedule_work(&adev->gfx.sq_work.work);
6726 }
6727
6728 return 0;
6729 }
6730
gfx_v8_0_emit_mem_sync(struct amdgpu_ring * ring)6731 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6732 {
6733 amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6734 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6735 PACKET3_TC_ACTION_ENA |
6736 PACKET3_SH_KCACHE_ACTION_ENA |
6737 PACKET3_SH_ICACHE_ACTION_ENA |
6738 PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6739 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6740 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6741 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6742 }
6743
gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring * ring)6744 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6745 {
6746 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6747 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6748 PACKET3_TC_ACTION_ENA |
6749 PACKET3_SH_KCACHE_ACTION_ENA |
6750 PACKET3_SH_ICACHE_ACTION_ENA |
6751 PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6752 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6753 amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
6754 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6755 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6756 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6757 }
6758
6759
6760 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6761 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT 0x0000007f
gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring * ring,uint32_t pipe,bool enable)6762 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6763 uint32_t pipe, bool enable)
6764 {
6765 uint32_t val;
6766 uint32_t wcl_cs_reg;
6767
6768 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6769
6770 switch (pipe) {
6771 case 0:
6772 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6773 break;
6774 case 1:
6775 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6776 break;
6777 case 2:
6778 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6779 break;
6780 case 3:
6781 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6782 break;
6783 default:
6784 DRM_DEBUG("invalid pipe %d\n", pipe);
6785 return;
6786 }
6787
6788 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6789
6790 }
6791
6792 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff
gfx_v8_0_emit_wave_limit(struct amdgpu_ring * ring,bool enable)6793 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6794 {
6795 struct amdgpu_device *adev = ring->adev;
6796 uint32_t val;
6797 int i;
6798
6799 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6800 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6801 * around 25% of gpu resources.
6802 */
6803 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6804 amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6805
6806 /* Restrict waves for normal/low priority compute queues as well
6807 * to get best QoS for high priority compute jobs.
6808 *
6809 * amdgpu controls only 1st ME(0-3 CS pipes).
6810 */
6811 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6812 if (i != ring->pipe)
6813 gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6814
6815 }
6816
6817 }
6818
6819 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6820 .name = "gfx_v8_0",
6821 .early_init = gfx_v8_0_early_init,
6822 .late_init = gfx_v8_0_late_init,
6823 .sw_init = gfx_v8_0_sw_init,
6824 .sw_fini = gfx_v8_0_sw_fini,
6825 .hw_init = gfx_v8_0_hw_init,
6826 .hw_fini = gfx_v8_0_hw_fini,
6827 .suspend = gfx_v8_0_suspend,
6828 .resume = gfx_v8_0_resume,
6829 .is_idle = gfx_v8_0_is_idle,
6830 .wait_for_idle = gfx_v8_0_wait_for_idle,
6831 .check_soft_reset = gfx_v8_0_check_soft_reset,
6832 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6833 .soft_reset = gfx_v8_0_soft_reset,
6834 .post_soft_reset = gfx_v8_0_post_soft_reset,
6835 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6836 .set_powergating_state = gfx_v8_0_set_powergating_state,
6837 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6838 };
6839
6840 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6841 .type = AMDGPU_RING_TYPE_GFX,
6842 .align_mask = 0xff,
6843 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6844 .support_64bit_ptrs = false,
6845 .get_rptr = gfx_v8_0_ring_get_rptr,
6846 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6847 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6848 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6849 5 + /* COND_EXEC */
6850 7 + /* PIPELINE_SYNC */
6851 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6852 12 + /* FENCE for VM_FLUSH */
6853 20 + /* GDS switch */
6854 4 + /* double SWITCH_BUFFER,
6855 the first COND_EXEC jump to the place just
6856 prior to this double SWITCH_BUFFER */
6857 5 + /* COND_EXEC */
6858 7 + /* HDP_flush */
6859 4 + /* VGT_flush */
6860 14 + /* CE_META */
6861 31 + /* DE_META */
6862 3 + /* CNTX_CTRL */
6863 5 + /* HDP_INVL */
6864 12 + 12 + /* FENCE x2 */
6865 2 + /* SWITCH_BUFFER */
6866 5, /* SURFACE_SYNC */
6867 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6868 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6869 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6870 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6871 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6872 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6873 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6874 .test_ring = gfx_v8_0_ring_test_ring,
6875 .test_ib = gfx_v8_0_ring_test_ib,
6876 .insert_nop = amdgpu_ring_insert_nop,
6877 .pad_ib = amdgpu_ring_generic_pad_ib,
6878 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6879 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6880 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6881 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6882 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6883 .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6884 };
6885
6886 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6887 .type = AMDGPU_RING_TYPE_COMPUTE,
6888 .align_mask = 0xff,
6889 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6890 .support_64bit_ptrs = false,
6891 .get_rptr = gfx_v8_0_ring_get_rptr,
6892 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6893 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6894 .emit_frame_size =
6895 20 + /* gfx_v8_0_ring_emit_gds_switch */
6896 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6897 5 + /* hdp_invalidate */
6898 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6899 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6900 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6901 7 + /* gfx_v8_0_emit_mem_sync_compute */
6902 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6903 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6904 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6905 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6906 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6907 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6908 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6909 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6910 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6911 .test_ring = gfx_v8_0_ring_test_ring,
6912 .test_ib = gfx_v8_0_ring_test_ib,
6913 .insert_nop = amdgpu_ring_insert_nop,
6914 .pad_ib = amdgpu_ring_generic_pad_ib,
6915 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6916 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6917 .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6918 .emit_wave_limit = gfx_v8_0_emit_wave_limit,
6919 };
6920
6921 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6922 .type = AMDGPU_RING_TYPE_KIQ,
6923 .align_mask = 0xff,
6924 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6925 .support_64bit_ptrs = false,
6926 .get_rptr = gfx_v8_0_ring_get_rptr,
6927 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6928 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6929 .emit_frame_size =
6930 20 + /* gfx_v8_0_ring_emit_gds_switch */
6931 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6932 5 + /* hdp_invalidate */
6933 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6934 17 + /* gfx_v8_0_ring_emit_vm_flush */
6935 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6936 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6937 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6938 .test_ring = gfx_v8_0_ring_test_ring,
6939 .insert_nop = amdgpu_ring_insert_nop,
6940 .pad_ib = amdgpu_ring_generic_pad_ib,
6941 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6942 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6943 };
6944
gfx_v8_0_set_ring_funcs(struct amdgpu_device * adev)6945 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6946 {
6947 int i;
6948
6949 adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6950
6951 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6952 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6953
6954 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6955 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6956 }
6957
6958 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6959 .set = gfx_v8_0_set_eop_interrupt_state,
6960 .process = gfx_v8_0_eop_irq,
6961 };
6962
6963 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6964 .set = gfx_v8_0_set_priv_reg_fault_state,
6965 .process = gfx_v8_0_priv_reg_irq,
6966 };
6967
6968 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6969 .set = gfx_v8_0_set_priv_inst_fault_state,
6970 .process = gfx_v8_0_priv_inst_irq,
6971 };
6972
6973 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6974 .set = gfx_v8_0_set_cp_ecc_int_state,
6975 .process = gfx_v8_0_cp_ecc_error_irq,
6976 };
6977
6978 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6979 .set = gfx_v8_0_set_sq_int_state,
6980 .process = gfx_v8_0_sq_irq,
6981 };
6982
gfx_v8_0_set_irq_funcs(struct amdgpu_device * adev)6983 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6984 {
6985 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6986 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6987
6988 adev->gfx.priv_reg_irq.num_types = 1;
6989 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6990
6991 adev->gfx.priv_inst_irq.num_types = 1;
6992 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6993
6994 adev->gfx.cp_ecc_error_irq.num_types = 1;
6995 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
6996
6997 adev->gfx.sq_irq.num_types = 1;
6998 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
6999 }
7000
gfx_v8_0_set_rlc_funcs(struct amdgpu_device * adev)7001 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7002 {
7003 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7004 }
7005
gfx_v8_0_set_gds_init(struct amdgpu_device * adev)7006 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7007 {
7008 /* init asci gds info */
7009 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7010 adev->gds.gws_size = 64;
7011 adev->gds.oa_size = 16;
7012 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7013 }
7014
gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7015 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7016 u32 bitmap)
7017 {
7018 u32 data;
7019
7020 if (!bitmap)
7021 return;
7022
7023 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7024 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7025
7026 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7027 }
7028
gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device * adev)7029 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7030 {
7031 u32 data, mask;
7032
7033 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7034 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7035
7036 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7037
7038 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7039 }
7040
gfx_v8_0_get_cu_info(struct amdgpu_device * adev)7041 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7042 {
7043 int i, j, k, counter, active_cu_number = 0;
7044 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7045 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7046 unsigned disable_masks[4 * 2];
7047 u32 ao_cu_num;
7048
7049 memset(cu_info, 0, sizeof(*cu_info));
7050
7051 if (adev->flags & AMD_IS_APU)
7052 ao_cu_num = 2;
7053 else
7054 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7055
7056 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7057
7058 mutex_lock(&adev->grbm_idx_mutex);
7059 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7060 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7061 mask = 1;
7062 ao_bitmap = 0;
7063 counter = 0;
7064 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7065 if (i < 4 && j < 2)
7066 gfx_v8_0_set_user_cu_inactive_bitmap(
7067 adev, disable_masks[i * 2 + j]);
7068 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7069 cu_info->bitmap[0][i][j] = bitmap;
7070
7071 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7072 if (bitmap & mask) {
7073 if (counter < ao_cu_num)
7074 ao_bitmap |= mask;
7075 counter ++;
7076 }
7077 mask <<= 1;
7078 }
7079 active_cu_number += counter;
7080 if (i < 2 && j < 2)
7081 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7082 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7083 }
7084 }
7085 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7086 mutex_unlock(&adev->grbm_idx_mutex);
7087
7088 cu_info->number = active_cu_number;
7089 cu_info->ao_cu_mask = ao_cu_mask;
7090 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7091 cu_info->max_waves_per_simd = 10;
7092 cu_info->max_scratch_slots_per_cu = 32;
7093 cu_info->wave_front_size = 64;
7094 cu_info->lds_size = 64;
7095 }
7096
7097 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7098 {
7099 .type = AMD_IP_BLOCK_TYPE_GFX,
7100 .major = 8,
7101 .minor = 0,
7102 .rev = 0,
7103 .funcs = &gfx_v8_0_ip_funcs,
7104 };
7105
7106 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7107 {
7108 .type = AMD_IP_BLOCK_TYPE_GFX,
7109 .major = 8,
7110 .minor = 1,
7111 .rev = 0,
7112 .funcs = &gfx_v8_0_ip_funcs,
7113 };
7114
gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring * ring)7115 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7116 {
7117 uint64_t ce_payload_addr;
7118 int cnt_ce;
7119 union {
7120 struct vi_ce_ib_state regular;
7121 struct vi_ce_ib_state_chained_ib chained;
7122 } ce_payload = {};
7123
7124 if (ring->adev->virt.chained_ib_support) {
7125 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7126 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7127 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7128 } else {
7129 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7130 offsetof(struct vi_gfx_meta_data, ce_payload);
7131 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7132 }
7133
7134 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7135 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7136 WRITE_DATA_DST_SEL(8) |
7137 WR_CONFIRM) |
7138 WRITE_DATA_CACHE_POLICY(0));
7139 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7140 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7141 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7142 }
7143
gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring * ring)7144 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7145 {
7146 uint64_t de_payload_addr, gds_addr, csa_addr;
7147 int cnt_de;
7148 union {
7149 struct vi_de_ib_state regular;
7150 struct vi_de_ib_state_chained_ib chained;
7151 } de_payload = {};
7152
7153 csa_addr = amdgpu_csa_vaddr(ring->adev);
7154 gds_addr = csa_addr + 4096;
7155 if (ring->adev->virt.chained_ib_support) {
7156 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7157 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7158 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7159 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7160 } else {
7161 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7162 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7163 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7164 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7165 }
7166
7167 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7168 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7169 WRITE_DATA_DST_SEL(8) |
7170 WR_CONFIRM) |
7171 WRITE_DATA_CACHE_POLICY(0));
7172 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7173 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7174 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7175 }
7176