1/* fuc microcode for nvc0 PGRAPH/GPC 2 * 3 * Copyright 2011 Red Hat Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: Ben Skeggs 24 */ 25 26/* To build: 27 * m4 nvc0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grgpc.fuc.h 28 */ 29 30/* TODO 31 * - bracket certain functions with scratch writes, useful for debugging 32 * - watchdog timer around ctx operations 33 */ 34 35.section #nvc0_grgpc_data 36include(`nvc0_graph.fuc') 37gpc_id: .b32 0 38gpc_mmio_list_head: .b32 0 39gpc_mmio_list_tail: .b32 0 40 41tpc_count: .b32 0 42tpc_mask: .b32 0 43tpc_mmio_list_head: .b32 0 44tpc_mmio_list_tail: .b32 0 45 46cmd_queue: queue_init 47 48// chipset descriptions 49chipsets: 50.b8 0xc0 0 0 0 51.b16 #nvc0_gpc_mmio_head 52.b16 #nvc0_gpc_mmio_tail 53.b16 #nvc0_tpc_mmio_head 54.b16 #nvc0_tpc_mmio_tail 55.b8 0xc1 0 0 0 56.b16 #nvc0_gpc_mmio_head 57.b16 #nvc1_gpc_mmio_tail 58.b16 #nvc0_tpc_mmio_head 59.b16 #nvc1_tpc_mmio_tail 60.b8 0xc3 0 0 0 61.b16 #nvc0_gpc_mmio_head 62.b16 #nvc0_gpc_mmio_tail 63.b16 #nvc0_tpc_mmio_head 64.b16 #nvc3_tpc_mmio_tail 65.b8 0xc4 0 0 0 66.b16 #nvc0_gpc_mmio_head 67.b16 #nvc0_gpc_mmio_tail 68.b16 #nvc0_tpc_mmio_head 69.b16 #nvc3_tpc_mmio_tail 70.b8 0xc8 0 0 0 71.b16 #nvc0_gpc_mmio_head 72.b16 #nvc0_gpc_mmio_tail 73.b16 #nvc0_tpc_mmio_head 74.b16 #nvc0_tpc_mmio_tail 75.b8 0xce 0 0 0 76.b16 #nvc0_gpc_mmio_head 77.b16 #nvc0_gpc_mmio_tail 78.b16 #nvc0_tpc_mmio_head 79.b16 #nvc3_tpc_mmio_tail 80.b8 0xcf 0 0 0 81.b16 #nvc0_gpc_mmio_head 82.b16 #nvc0_gpc_mmio_tail 83.b16 #nvc0_tpc_mmio_head 84.b16 #nvcf_tpc_mmio_tail 85.b8 0xd9 0 0 0 86.b16 #nvd9_gpc_mmio_head 87.b16 #nvd9_gpc_mmio_tail 88.b16 #nvd9_tpc_mmio_head 89.b16 #nvd9_tpc_mmio_tail 90.b8 0 0 0 0 91 92// GPC mmio lists 93nvc0_gpc_mmio_head: 94mmctx_data(0x000380, 1) 95mmctx_data(0x000400, 6) 96mmctx_data(0x000450, 9) 97mmctx_data(0x000600, 1) 98mmctx_data(0x000684, 1) 99mmctx_data(0x000700, 5) 100mmctx_data(0x000800, 1) 101mmctx_data(0x000808, 3) 102mmctx_data(0x000828, 1) 103mmctx_data(0x000830, 1) 104mmctx_data(0x0008d8, 1) 105mmctx_data(0x0008e0, 1) 106mmctx_data(0x0008e8, 6) 107mmctx_data(0x00091c, 1) 108mmctx_data(0x000924, 3) 109mmctx_data(0x000b00, 1) 110mmctx_data(0x000b08, 6) 111mmctx_data(0x000bb8, 1) 112mmctx_data(0x000c08, 1) 113mmctx_data(0x000c10, 8) 114mmctx_data(0x000c80, 1) 115mmctx_data(0x000c8c, 1) 116mmctx_data(0x001000, 3) 117mmctx_data(0x001014, 1) 118nvc0_gpc_mmio_tail: 119mmctx_data(0x000c6c, 1); 120nvc1_gpc_mmio_tail: 121 122nvd9_gpc_mmio_head: 123mmctx_data(0x000380, 1) 124mmctx_data(0x000400, 2) 125mmctx_data(0x00040c, 3) 126mmctx_data(0x000450, 9) 127mmctx_data(0x000600, 1) 128mmctx_data(0x000684, 1) 129mmctx_data(0x000700, 5) 130mmctx_data(0x000800, 1) 131mmctx_data(0x000808, 3) 132mmctx_data(0x000828, 1) 133mmctx_data(0x000830, 1) 134mmctx_data(0x0008d8, 1) 135mmctx_data(0x0008e0, 1) 136mmctx_data(0x0008e8, 6) 137mmctx_data(0x00091c, 1) 138mmctx_data(0x000924, 3) 139mmctx_data(0x000b00, 1) 140mmctx_data(0x000b08, 6) 141mmctx_data(0x000bb8, 1) 142mmctx_data(0x000c08, 1) 143mmctx_data(0x000c10, 8) 144mmctx_data(0x000c6c, 1) 145mmctx_data(0x000c80, 1) 146mmctx_data(0x000c8c, 1) 147mmctx_data(0x001000, 3) 148mmctx_data(0x001014, 1) 149nvd9_gpc_mmio_tail: 150 151// TPC mmio lists 152nvc0_tpc_mmio_head: 153mmctx_data(0x000018, 1) 154mmctx_data(0x00003c, 1) 155mmctx_data(0x000048, 1) 156mmctx_data(0x000064, 1) 157mmctx_data(0x000088, 1) 158mmctx_data(0x000200, 6) 159mmctx_data(0x00021c, 2) 160mmctx_data(0x000300, 6) 161mmctx_data(0x0003d0, 1) 162mmctx_data(0x0003e0, 2) 163mmctx_data(0x000400, 3) 164mmctx_data(0x000420, 1) 165mmctx_data(0x0004b0, 1) 166mmctx_data(0x0004e8, 1) 167mmctx_data(0x0004f4, 1) 168mmctx_data(0x000520, 2) 169mmctx_data(0x000604, 4) 170mmctx_data(0x000644, 20) 171mmctx_data(0x000698, 1) 172mmctx_data(0x000750, 2) 173nvc0_tpc_mmio_tail: 174mmctx_data(0x000758, 1) 175mmctx_data(0x0002c4, 1) 176mmctx_data(0x0006e0, 1) 177nvcf_tpc_mmio_tail: 178mmctx_data(0x0004bc, 1) 179nvc3_tpc_mmio_tail: 180mmctx_data(0x000544, 1) 181nvc1_tpc_mmio_tail: 182 183nvd9_tpc_mmio_head: 184mmctx_data(0x000018, 1) 185mmctx_data(0x00003c, 1) 186mmctx_data(0x000048, 1) 187mmctx_data(0x000064, 1) 188mmctx_data(0x000088, 1) 189mmctx_data(0x000200, 6) 190mmctx_data(0x00021c, 2) 191mmctx_data(0x0002c4, 1) 192mmctx_data(0x000300, 6) 193mmctx_data(0x0003d0, 1) 194mmctx_data(0x0003e0, 2) 195mmctx_data(0x000400, 3) 196mmctx_data(0x000420, 3) 197mmctx_data(0x0004b0, 1) 198mmctx_data(0x0004e8, 1) 199mmctx_data(0x0004f4, 1) 200mmctx_data(0x000520, 2) 201mmctx_data(0x000544, 1) 202mmctx_data(0x000604, 4) 203mmctx_data(0x000644, 20) 204mmctx_data(0x000698, 1) 205mmctx_data(0x0006e0, 1) 206mmctx_data(0x000750, 3) 207nvd9_tpc_mmio_tail: 208 209.section #nvc0_grgpc_code 210bra #init 211define(`include_code') 212include(`nvc0_graph.fuc') 213 214// reports an exception to the host 215// 216// In: $r15 error code (see nvc0_graph.fuc) 217// 218error: 219 push $r14 220 mov $r14 -0x67ec // 0x9814 221 sethi $r14 0x400000 222 call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code 223 add b32 $r14 0x41c 224 mov $r15 1 225 call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET 226 pop $r14 227 ret 228 229// GPC fuc initialisation, executed by triggering ucode start, will 230// fall through to main loop after completion. 231// 232// Input: 233// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) 234// CC_SCRATCH[1]: context base 235// 236// Output: 237// CC_SCRATCH[0]: 238// 31:31: set to signal completion 239// CC_SCRATCH[1]: 240// 31:0: GPC context size 241// 242init: 243 clear b32 $r0 244 mov $sp $r0 245 246 // enable fifo access 247 mov $r1 0x1200 248 mov $r2 2 249 iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE 250 251 // setup i0 handler, and route all interrupts to it 252 mov $r1 #ih 253 mov $iv0 $r1 254 mov $r1 0x400 255 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH 256 257 // enable fifo interrupt 258 mov $r2 4 259 iowr I[$r1 + 0x000] $r2 // INTR_EN_SET 260 261 // enable interrupts 262 bset $flags ie0 263 264 // figure out which GPC we are, and how many TPCs we have 265 mov $r1 0x608 266 shl b32 $r1 6 267 iord $r2 I[$r1 + 0x000] // UNITS 268 mov $r3 1 269 and $r2 0x1f 270 shl b32 $r3 $r2 271 sub b32 $r3 1 272 st b32 D[$r0 + #tpc_count] $r2 273 st b32 D[$r0 + #tpc_mask] $r3 274 add b32 $r1 0x400 275 iord $r2 I[$r1 + 0x000] // MYINDEX 276 st b32 D[$r0 + #gpc_id] $r2 277 278 // find context data for this chipset 279 mov $r2 0x800 280 shl b32 $r2 6 281 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] 282 mov $r1 #chipsets - 12 283 init_find_chipset: 284 add b32 $r1 12 285 ld b32 $r3 D[$r1 + 0x00] 286 cmpu b32 $r3 $r2 287 bra e #init_context 288 cmpu b32 $r3 0 289 bra ne #init_find_chipset 290 // unknown chipset 291 ret 292 293 // initialise context base, and size tracking 294 init_context: 295 mov $r2 0x800 296 shl b32 $r2 6 297 iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base 298 clear b32 $r3 // track GPC context size here 299 300 // set mmctx base addresses now so we don't have to do it later, 301 // they don't currently ever change 302 mov $r4 0x700 303 shl b32 $r4 6 304 shr b32 $r5 $r2 8 305 iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE 306 iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE 307 308 // calculate GPC mmio context size, store the chipset-specific 309 // mmio list pointers somewhere we can get at them later without 310 // re-parsing the chipset list 311 clear b32 $r14 312 clear b32 $r15 313 ld b16 $r14 D[$r1 + 4] 314 ld b16 $r15 D[$r1 + 6] 315 st b16 D[$r0 + #gpc_mmio_list_head] $r14 316 st b16 D[$r0 + #gpc_mmio_list_tail] $r15 317 call #mmctx_size 318 add b32 $r2 $r15 319 add b32 $r3 $r15 320 321 // calculate per-TPC mmio context size, store the list pointers 322 ld b16 $r14 D[$r1 + 8] 323 ld b16 $r15 D[$r1 + 10] 324 st b16 D[$r0 + #tpc_mmio_list_head] $r14 325 st b16 D[$r0 + #tpc_mmio_list_tail] $r15 326 call #mmctx_size 327 ld b32 $r14 D[$r0 + #tpc_count] 328 mulu $r14 $r15 329 add b32 $r2 $r14 330 add b32 $r3 $r14 331 332 // round up base/size to 256 byte boundary (for strand SWBASE) 333 add b32 $r4 0x1300 334 shr b32 $r3 2 335 iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!? 336 shr b32 $r2 8 337 shr b32 $r3 6 338 add b32 $r2 1 339 add b32 $r3 1 340 shl b32 $r2 8 341 shl b32 $r3 8 342 343 // calculate size of strand context data 344 mov b32 $r15 $r2 345 call #strand_ctx_init 346 add b32 $r3 $r15 347 348 // save context size, and tell HUB we're done 349 mov $r1 0x800 350 shl b32 $r1 6 351 iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size 352 add b32 $r1 0x800 353 clear b32 $r2 354 bset $r2 31 355 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 356 357// Main program loop, very simple, sleeps until woken up by the interrupt 358// handler, pulls a command from the queue and executes its handler 359// 360main: 361 bset $flags $p0 362 sleep $p0 363 mov $r13 #cmd_queue 364 call #queue_get 365 bra $p1 #main 366 367 // 0x0000-0x0003 are all context transfers 368 cmpu b32 $r14 0x04 369 bra nc #main_not_ctx_xfer 370 // fetch $flags and mask off $p1/$p2 371 mov $r1 $flags 372 mov $r2 0x0006 373 not b32 $r2 374 and $r1 $r2 375 // set $p1/$p2 according to transfer type 376 shl b32 $r14 1 377 or $r1 $r14 378 mov $flags $r1 379 // transfer context data 380 call #ctx_xfer 381 bra #main 382 383 main_not_ctx_xfer: 384 shl b32 $r15 $r14 16 385 or $r15 E_BAD_COMMAND 386 call #error 387 bra #main 388 389// interrupt handler 390ih: 391 push $r8 392 mov $r8 $flags 393 push $r8 394 push $r9 395 push $r10 396 push $r11 397 push $r13 398 push $r14 399 push $r15 400 401 // incoming fifo command? 402 iord $r10 I[$r0 + 0x200] // INTR 403 and $r11 $r10 0x00000004 404 bra e #ih_no_fifo 405 // queue incoming fifo command for later processing 406 mov $r11 0x1900 407 mov $r13 #cmd_queue 408 iord $r14 I[$r11 + 0x100] // FIFO_CMD 409 iord $r15 I[$r11 + 0x000] // FIFO_DATA 410 call #queue_put 411 add b32 $r11 0x400 412 mov $r14 1 413 iowr I[$r11 + 0x000] $r14 // FIFO_ACK 414 415 // ack, and wake up main() 416 ih_no_fifo: 417 iowr I[$r0 + 0x100] $r10 // INTR_ACK 418 419 pop $r15 420 pop $r14 421 pop $r13 422 pop $r11 423 pop $r10 424 pop $r9 425 pop $r8 426 mov $flags $r8 427 pop $r8 428 bclr $flags $p0 429 iret 430 431// Set this GPC's bit in HUB_BAR, used to signal completion of various 432// activities to the HUB fuc 433// 434hub_barrier_done: 435 mov $r15 1 436 ld b32 $r14 D[$r0 + #gpc_id] 437 shl b32 $r15 $r14 438 mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET 439 sethi $r14 0x400000 440 call #nv_wr32 441 ret 442 443// Disables various things, waits a bit, and re-enables them.. 444// 445// Not sure how exactly this helps, perhaps "ENABLE" is not such a 446// good description for the bits we turn off? Anyways, without this, 447// funny things happen. 448// 449ctx_redswitch: 450 mov $r14 0x614 451 shl b32 $r14 6 452 mov $r15 0x020 453 iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER 454 mov $r15 8 455 ctx_redswitch_delay: 456 sub b32 $r15 1 457 bra ne #ctx_redswitch_delay 458 mov $r15 0xa20 459 iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER 460 ret 461 462// Transfer GPC context data between GPU and storage area 463// 464// In: $r15 context base address 465// $p1 clear on save, set on load 466// $p2 set if opposite direction done/will be done, so: 467// on save it means: "a load will follow this save" 468// on load it means: "a save preceeded this load" 469// 470ctx_xfer: 471 // set context base address 472 mov $r1 0xa04 473 shl b32 $r1 6 474 iowr I[$r1 + 0x000] $r15// MEM_BASE 475 bra not $p1 #ctx_xfer_not_load 476 call #ctx_redswitch 477 ctx_xfer_not_load: 478 479 // strands 480 mov $r1 0x4afc 481 sethi $r1 0x20000 482 mov $r2 0xc 483 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c 484 call #strand_wait 485 mov $r2 0x47fc 486 sethi $r2 0x20000 487 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 488 xbit $r2 $flags $p1 489 add b32 $r2 3 490 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) 491 492 // mmio context 493 xbit $r10 $flags $p1 // direction 494 or $r10 2 // first 495 mov $r11 0x0000 496 sethi $r11 0x500000 497 ld b32 $r12 D[$r0 + #gpc_id] 498 shl b32 $r12 15 499 add b32 $r11 $r12 // base = NV_PGRAPH_GPCn 500 ld b32 $r12 D[$r0 + #gpc_mmio_list_head] 501 ld b32 $r13 D[$r0 + #gpc_mmio_list_tail] 502 mov $r14 0 // not multi 503 call #mmctx_xfer 504 505 // per-TPC mmio context 506 xbit $r10 $flags $p1 // direction 507 or $r10 4 // last 508 mov $r11 0x4000 509 sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 510 ld b32 $r12 D[$r0 + #gpc_id] 511 shl b32 $r12 15 512 add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 513 ld b32 $r12 D[$r0 + #tpc_mmio_list_head] 514 ld b32 $r13 D[$r0 + #tpc_mmio_list_tail] 515 ld b32 $r15 D[$r0 + #tpc_mask] 516 mov $r14 0x800 // stride = 0x800 517 call #mmctx_xfer 518 519 // wait for strands to finish 520 call #strand_wait 521 522 // if load, or a save without a load following, do some 523 // unknown stuff that's done after finishing a block of 524 // strand commands 525 bra $p1 #ctx_xfer_post 526 bra not $p2 #ctx_xfer_done 527 ctx_xfer_post: 528 mov $r1 0x4afc 529 sethi $r1 0x20000 530 mov $r2 0xd 531 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d 532 call #strand_wait 533 534 // mark completion in HUB's barrier 535 ctx_xfer_done: 536 call #hub_barrier_done 537 ret 538 539.align 256 540