1 /* 2 * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #define QEMU_GENERATE 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/helper-gen.h" 24 #include "exec/helper-proto.h" 25 #include "exec/translation-block.h" 26 #include "exec/cpu_ldst.h" 27 #include "exec/log.h" 28 #include "internal.h" 29 #include "attribs.h" 30 #include "insn.h" 31 #include "decode.h" 32 #include "translate.h" 33 #include "genptr.h" 34 #include "printinsn.h" 35 36 #define HELPER_H "helper.h" 37 #include "exec/helper-info.c.inc" 38 #undef HELPER_H 39 40 #include "analyze_funcs_generated.c.inc" 41 42 typedef void (*AnalyzeInsn)(DisasContext *ctx); 43 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { 44 #define OPCODE(X) [X] = analyze_##X 45 #include "opcodes_def_generated.h.inc" 46 #undef OPCODE 47 }; 48 49 TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; 50 TCGv hex_pred[NUM_PREGS]; 51 TCGv hex_slot_cancelled; 52 TCGv hex_new_value_usr; 53 TCGv hex_store_addr[STORES_MAX]; 54 TCGv hex_store_width[STORES_MAX]; 55 TCGv hex_store_val32[STORES_MAX]; 56 TCGv_i64 hex_store_val64[STORES_MAX]; 57 TCGv hex_llsc_addr; 58 TCGv hex_llsc_val; 59 TCGv_i64 hex_llsc_val_i64; 60 TCGv hex_vstore_addr[VSTORES_MAX]; 61 TCGv hex_vstore_size[VSTORES_MAX]; 62 TCGv hex_vstore_pending[VSTORES_MAX]; 63 64 static const char * const hexagon_prednames[] = { 65 "p0", "p1", "p2", "p3" 66 }; 67 68 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, 69 int num, bool alloc_ok) 70 { 71 intptr_t offset; 72 73 if (!ctx->need_commit) { 74 return offsetof(CPUHexagonState, VRegs[regnum]); 75 } 76 77 /* See if it is already allocated */ 78 for (int i = 0; i < ctx->future_vregs_idx; i++) { 79 if (ctx->future_vregs_num[i] == regnum) { 80 return offsetof(CPUHexagonState, future_VRegs[i]); 81 } 82 } 83 84 g_assert(alloc_ok); 85 offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]); 86 for (int i = 0; i < num; i++) { 87 ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++; 88 } 89 ctx->future_vregs_idx += num; 90 g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX); 91 return offset; 92 } 93 94 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, 95 int num, bool alloc_ok) 96 { 97 intptr_t offset; 98 99 /* See if it is already allocated */ 100 for (int i = 0; i < ctx->tmp_vregs_idx; i++) { 101 if (ctx->tmp_vregs_num[i] == regnum) { 102 return offsetof(CPUHexagonState, tmp_VRegs[i]); 103 } 104 } 105 106 g_assert(alloc_ok); 107 offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]); 108 for (int i = 0; i < num; i++) { 109 ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++; 110 } 111 ctx->tmp_vregs_idx += num; 112 g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX); 113 return offset; 114 } 115 116 static void gen_exception_raw(int excp) 117 { 118 gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); 119 } 120 121 static void gen_exec_counters(DisasContext *ctx) 122 { 123 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], 124 hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); 125 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], 126 hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); 127 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], 128 hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); 129 } 130 131 static bool use_goto_tb(DisasContext *ctx, target_ulong dest) 132 { 133 return translator_use_goto_tb(&ctx->base, dest); 134 } 135 136 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool 137 move_to_pc) 138 { 139 if (use_goto_tb(ctx, dest)) { 140 tcg_gen_goto_tb(idx); 141 if (move_to_pc) { 142 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 143 } 144 tcg_gen_exit_tb(ctx->base.tb, idx); 145 } else { 146 if (move_to_pc) { 147 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest); 148 } 149 tcg_gen_lookup_and_goto_ptr(); 150 } 151 } 152 153 static void gen_end_tb(DisasContext *ctx) 154 { 155 Packet *pkt = ctx->pkt; 156 157 gen_exec_counters(ctx); 158 159 if (ctx->branch_cond != TCG_COND_NEVER) { 160 if (ctx->branch_cond != TCG_COND_ALWAYS) { 161 TCGLabel *skip = gen_new_label(); 162 tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip); 163 gen_goto_tb(ctx, 0, ctx->branch_dest, true); 164 gen_set_label(skip); 165 gen_goto_tb(ctx, 1, ctx->next_PC, false); 166 } else { 167 gen_goto_tb(ctx, 0, ctx->branch_dest, true); 168 } 169 } else if (ctx->is_tight_loop && 170 pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) { 171 /* 172 * When we're in a tight loop, we defer the endloop0 processing 173 * to take advantage of direct block chaining 174 */ 175 TCGLabel *skip = gen_new_label(); 176 tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip); 177 tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1); 178 gen_goto_tb(ctx, 0, ctx->base.tb->pc, true); 179 gen_set_label(skip); 180 gen_goto_tb(ctx, 1, ctx->next_PC, false); 181 } else { 182 tcg_gen_lookup_and_goto_ptr(); 183 } 184 185 ctx->base.is_jmp = DISAS_NORETURN; 186 } 187 188 static void gen_exception_end_tb(DisasContext *ctx, int excp) 189 { 190 gen_exec_counters(ctx); 191 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); 192 gen_exception_raw(excp); 193 ctx->base.is_jmp = DISAS_NORETURN; 194 195 } 196 197 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, 198 uint32_t words[]) 199 { 200 bool found_end = false; 201 int nwords, max_words; 202 203 memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t)); 204 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 205 words[nwords] = 206 translator_ldl(env, &ctx->base, 207 ctx->base.pc_next + nwords * sizeof(uint32_t)); 208 found_end = is_packet_end(words[nwords]); 209 } 210 if (!found_end) { 211 /* Read too many words without finding the end */ 212 return 0; 213 } 214 215 /* Check for page boundary crossing */ 216 max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t); 217 if (nwords > max_words) { 218 /* We can only cross a page boundary at the beginning of a TB */ 219 g_assert(ctx->base.num_insns == 1); 220 } 221 222 return nwords; 223 } 224 225 static bool check_for_attrib(Packet *pkt, int attrib) 226 { 227 for (int i = 0; i < pkt->num_insns; i++) { 228 if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { 229 return true; 230 } 231 } 232 return false; 233 } 234 235 static bool need_slot_cancelled(Packet *pkt) 236 { 237 /* We only need slot_cancelled for conditional store instructions */ 238 for (int i = 0; i < pkt->num_insns; i++) { 239 uint16_t opcode = pkt->insn[i].opcode; 240 if (GET_ATTRIB(opcode, A_CONDEXEC) && 241 GET_ATTRIB(opcode, A_SCALAR_STORE)) { 242 return true; 243 } 244 } 245 return false; 246 } 247 248 static bool need_next_PC(DisasContext *ctx) 249 { 250 Packet *pkt = ctx->pkt; 251 252 /* Check for conditional control flow or HW loop end */ 253 for (int i = 0; i < pkt->num_insns; i++) { 254 uint16_t opcode = pkt->insn[i].opcode; 255 if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) { 256 return true; 257 } 258 if (GET_ATTRIB(opcode, A_HWLOOP0_END) || 259 GET_ATTRIB(opcode, A_HWLOOP1_END)) { 260 return true; 261 } 262 } 263 return false; 264 } 265 266 /* 267 * The opcode_analyze functions mark most of the writes in a packet 268 * However, there are some implicit writes marked as attributes 269 * of the applicable instructions. 270 */ 271 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) 272 { 273 uint16_t opcode = ctx->insn->opcode; 274 if (GET_ATTRIB(opcode, attrib)) { 275 /* 276 * USR is used to set overflow and FP exceptions, 277 * so treat it as conditional 278 */ 279 bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) || 280 rnum == HEX_REG_USR; 281 282 /* LC0/LC1 is conditionally written by endloop instructions */ 283 if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) && 284 (opcode == J2_endloop0 || 285 opcode == J2_endloop1 || 286 opcode == J2_endloop01)) { 287 is_predicated = true; 288 } 289 290 ctx_log_reg_write(ctx, rnum, is_predicated); 291 } 292 } 293 294 static void mark_implicit_reg_writes(DisasContext *ctx) 295 { 296 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); 297 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP); 298 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR); 299 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0); 300 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); 301 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); 302 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); 303 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); 304 mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); 305 } 306 307 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) 308 { 309 if (GET_ATTRIB(ctx->insn->opcode, attrib)) { 310 ctx_log_pred_write(ctx, pnum); 311 } 312 } 313 314 static void mark_implicit_pred_writes(DisasContext *ctx) 315 { 316 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0); 317 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1); 318 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2); 319 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); 320 } 321 322 static bool pkt_raises_exception(Packet *pkt) 323 { 324 if (check_for_attrib(pkt, A_LOAD) || 325 check_for_attrib(pkt, A_STORE)) { 326 return true; 327 } 328 return false; 329 } 330 331 static bool need_commit(DisasContext *ctx) 332 { 333 Packet *pkt = ctx->pkt; 334 335 /* 336 * If the short-circuit property is set to false, we'll always do the commit 337 */ 338 if (!ctx->short_circuit) { 339 return true; 340 } 341 342 if (pkt_raises_exception(pkt)) { 343 return true; 344 } 345 346 /* Registers with immutability flags require new_value */ 347 for (int i = 0; i < ctx->reg_log_idx; i++) { 348 int rnum = ctx->reg_log[i]; 349 if (reg_immut_masks[rnum]) { 350 return true; 351 } 352 } 353 354 /* Floating point instructions are hard-coded to use new_value */ 355 if (check_for_attrib(pkt, A_FPOP)) { 356 return true; 357 } 358 359 if (ctx->read_after_write || ctx->has_hvx_overlap) { 360 return true; 361 } 362 363 return false; 364 } 365 366 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) 367 { 368 if (GET_ATTRIB(ctx->insn->opcode, attrib)) { 369 ctx_log_pred_read(ctx, pnum); 370 } 371 } 372 373 static void mark_implicit_pred_reads(DisasContext *ctx) 374 { 375 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0); 376 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1); 377 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2); 378 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3); 379 } 380 381 static void analyze_packet(DisasContext *ctx) 382 { 383 Packet *pkt = ctx->pkt; 384 ctx->read_after_write = false; 385 ctx->has_hvx_overlap = false; 386 for (int i = 0; i < pkt->num_insns; i++) { 387 Insn *insn = &pkt->insn[i]; 388 ctx->insn = insn; 389 if (opcode_analyze[insn->opcode]) { 390 opcode_analyze[insn->opcode](ctx); 391 } 392 mark_implicit_reg_writes(ctx); 393 mark_implicit_pred_writes(ctx); 394 mark_implicit_pred_reads(ctx); 395 } 396 397 ctx->need_commit = need_commit(ctx); 398 } 399 400 static void gen_start_packet(DisasContext *ctx) 401 { 402 Packet *pkt = ctx->pkt; 403 target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; 404 int i; 405 406 /* Clear out the disassembly context */ 407 ctx->next_PC = next_PC; 408 ctx->reg_log_idx = 0; 409 bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); 410 bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 411 ctx->preg_log_idx = 0; 412 bitmap_zero(ctx->pregs_written, NUM_PREGS); 413 ctx->future_vregs_idx = 0; 414 ctx->tmp_vregs_idx = 0; 415 ctx->vreg_log_idx = 0; 416 bitmap_zero(ctx->vregs_written, NUM_VREGS); 417 bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); 418 bitmap_zero(ctx->vregs_updated, NUM_VREGS); 419 bitmap_zero(ctx->vregs_select, NUM_VREGS); 420 bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); 421 bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); 422 bitmap_zero(ctx->qregs_written, NUM_QREGS); 423 ctx->qreg_log_idx = 0; 424 for (i = 0; i < STORES_MAX; i++) { 425 ctx->store_width[i] = 0; 426 } 427 ctx->s1_store_processed = false; 428 ctx->pre_commit = true; 429 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { 430 ctx->new_value[i] = NULL; 431 } 432 for (i = 0; i < NUM_PREGS; i++) { 433 ctx->new_pred_value[i] = NULL; 434 } 435 436 analyze_packet(ctx); 437 438 /* 439 * pregs_written is used both in the analyze phase as well as the code 440 * gen phase, so clear it again. 441 */ 442 bitmap_zero(ctx->pregs_written, NUM_PREGS); 443 444 /* Initialize the runtime state for packet semantics */ 445 if (need_slot_cancelled(pkt)) { 446 tcg_gen_movi_tl(hex_slot_cancelled, 0); 447 } 448 ctx->branch_taken = NULL; 449 if (pkt->pkt_has_cof) { 450 ctx->branch_taken = tcg_temp_new(); 451 if (pkt->pkt_has_multi_cof) { 452 tcg_gen_movi_tl(ctx->branch_taken, 0); 453 } 454 if (need_next_PC(ctx)) { 455 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); 456 } 457 } 458 459 /* Preload the predicated registers into get_result_gpr(ctx, i) */ 460 if (ctx->need_commit && 461 !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { 462 i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); 463 while (i < TOTAL_PER_THREAD_REGS) { 464 tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]); 465 i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, 466 i + 1); 467 } 468 } 469 470 /* 471 * Preload the predicated pred registers into ctx->new_pred_value[pred_num] 472 * Only endloop instructions conditionally write to pred registers 473 */ 474 if (ctx->need_commit && pkt->pkt_has_endloop) { 475 for (i = 0; i < ctx->preg_log_idx; i++) { 476 int pred_num = ctx->preg_log[i]; 477 ctx->new_pred_value[pred_num] = tcg_temp_new(); 478 tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]); 479 } 480 } 481 482 /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ 483 if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { 484 i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); 485 while (i < NUM_VREGS) { 486 const intptr_t VdV_off = 487 ctx_future_vreg_off(ctx, i, 1, true); 488 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 489 tcg_gen_gvec_mov(MO_64, VdV_off, 490 src_off, 491 sizeof(MMVector), 492 sizeof(MMVector)); 493 i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1); 494 } 495 } 496 if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) { 497 i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS); 498 while (i < NUM_VREGS) { 499 const intptr_t VdV_off = 500 ctx_tmp_vreg_off(ctx, i, 1, true); 501 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]); 502 tcg_gen_gvec_mov(MO_64, VdV_off, 503 src_off, 504 sizeof(MMVector), 505 sizeof(MMVector)); 506 i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1); 507 } 508 } 509 } 510 511 bool is_gather_store_insn(DisasContext *ctx) 512 { 513 Packet *pkt = ctx->pkt; 514 Insn *insn = ctx->insn; 515 if (GET_ATTRIB(insn->opcode, A_CVI_NEW) && 516 insn->new_value_producer_slot == 1) { 517 /* Look for gather instruction */ 518 for (int i = 0; i < pkt->num_insns; i++) { 519 Insn *in = &pkt->insn[i]; 520 if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) { 521 return true; 522 } 523 } 524 } 525 return false; 526 } 527 528 static void mark_store_width(DisasContext *ctx) 529 { 530 uint16_t opcode = ctx->insn->opcode; 531 uint32_t slot = ctx->insn->slot; 532 uint8_t width = 0; 533 534 if (GET_ATTRIB(opcode, A_SCALAR_STORE)) { 535 if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) { 536 return; 537 } 538 if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { 539 width |= 1; 540 } 541 if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) { 542 width |= 2; 543 } 544 if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) { 545 width |= 4; 546 } 547 if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) { 548 width |= 8; 549 } 550 tcg_debug_assert(is_power_of_2(width)); 551 ctx->store_width[slot] = width; 552 } 553 } 554 555 static void gen_insn(DisasContext *ctx) 556 { 557 if (ctx->insn->generate) { 558 ctx->insn->generate(ctx); 559 mark_store_width(ctx); 560 } else { 561 gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE); 562 } 563 } 564 565 /* 566 * Helpers for generating the packet commit 567 */ 568 static void gen_reg_writes(DisasContext *ctx) 569 { 570 int i; 571 572 /* Early exit if not needed */ 573 if (!ctx->need_commit) { 574 return; 575 } 576 577 for (i = 0; i < ctx->reg_log_idx; i++) { 578 int reg_num = ctx->reg_log[i]; 579 580 tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num)); 581 582 /* 583 * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. 584 * If we write to SA0, we have to turn off tight loop handling. 585 */ 586 if (reg_num == HEX_REG_SA0) { 587 ctx->is_tight_loop = false; 588 } 589 } 590 } 591 592 static void gen_pred_writes(DisasContext *ctx) 593 { 594 /* Early exit if not needed or the log is empty */ 595 if (!ctx->need_commit || !ctx->preg_log_idx) { 596 return; 597 } 598 599 for (int i = 0; i < ctx->preg_log_idx; i++) { 600 int pred_num = ctx->preg_log[i]; 601 tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]); 602 } 603 } 604 605 static bool slot_is_predicated(Packet *pkt, int slot_num) 606 { 607 for (int i = 0; i < pkt->num_insns; i++) { 608 if (pkt->insn[i].slot == slot_num) { 609 return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC); 610 } 611 } 612 /* If we get to here, we didn't find an instruction in the requested slot */ 613 g_assert_not_reached(); 614 } 615 616 void process_store(DisasContext *ctx, int slot_num) 617 { 618 bool is_predicated = slot_is_predicated(ctx->pkt, slot_num); 619 TCGLabel *label_end = NULL; 620 621 /* 622 * We may have already processed this store 623 * See CHECK_NOSHUF in macros.h 624 */ 625 if (slot_num == 1 && ctx->s1_store_processed) { 626 return; 627 } 628 ctx->s1_store_processed = true; 629 630 if (is_predicated) { 631 TCGv cancelled = tcg_temp_new(); 632 label_end = gen_new_label(); 633 634 /* Don't do anything if the slot was cancelled */ 635 tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1); 636 tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end); 637 } 638 { 639 TCGv address = tcg_temp_new(); 640 tcg_gen_mov_tl(address, hex_store_addr[slot_num]); 641 642 /* 643 * If we know the width from the DisasContext, we can 644 * generate much cleaner code. 645 * Unfortunately, not all instructions execute the fSTORE 646 * macro during code generation. Anything that uses the 647 * generic helper will have this problem. Instructions 648 * that use fWRAP to generate proper TCG code will be OK. 649 */ 650 switch (ctx->store_width[slot_num]) { 651 case 1: 652 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 653 hex_store_addr[slot_num], 654 ctx->mem_idx, MO_UB); 655 break; 656 case 2: 657 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 658 hex_store_addr[slot_num], 659 ctx->mem_idx, MO_TEUW); 660 break; 661 case 4: 662 tcg_gen_qemu_st_tl(hex_store_val32[slot_num], 663 hex_store_addr[slot_num], 664 ctx->mem_idx, MO_TEUL); 665 break; 666 case 8: 667 tcg_gen_qemu_st_i64(hex_store_val64[slot_num], 668 hex_store_addr[slot_num], 669 ctx->mem_idx, MO_TEUQ); 670 break; 671 default: 672 { 673 /* 674 * If we get to here, we don't know the width at 675 * TCG generation time, we'll use a helper to 676 * avoid branching based on the width at runtime. 677 */ 678 TCGv slot = tcg_constant_tl(slot_num); 679 gen_helper_commit_store(tcg_env, slot); 680 } 681 } 682 } 683 if (is_predicated) { 684 gen_set_label(label_end); 685 } 686 } 687 688 static void process_store_log(DisasContext *ctx) 689 { 690 /* 691 * When a packet has two stores, the hardware processes 692 * slot 1 and then slot 0. This will be important when 693 * the memory accesses overlap. 694 */ 695 Packet *pkt = ctx->pkt; 696 if (pkt->pkt_has_store_s1) { 697 g_assert(!pkt->pkt_has_dczeroa); 698 process_store(ctx, 1); 699 } 700 if (pkt->pkt_has_store_s0) { 701 g_assert(!pkt->pkt_has_dczeroa); 702 process_store(ctx, 0); 703 } 704 } 705 706 /* Zero out a 32-bit cache line */ 707 static void process_dczeroa(DisasContext *ctx) 708 { 709 if (ctx->pkt->pkt_has_dczeroa) { 710 /* Store 32 bytes of zero starting at (addr & ~0x1f) */ 711 TCGv addr = tcg_temp_new(); 712 TCGv_i64 zero = tcg_constant_i64(0); 713 714 tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f); 715 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 716 tcg_gen_addi_tl(addr, addr, 8); 717 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 718 tcg_gen_addi_tl(addr, addr, 8); 719 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 720 tcg_gen_addi_tl(addr, addr, 8); 721 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); 722 } 723 } 724 725 static bool pkt_has_hvx_store(Packet *pkt) 726 { 727 int i; 728 for (i = 0; i < pkt->num_insns; i++) { 729 int opcode = pkt->insn[i].opcode; 730 if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) { 731 return true; 732 } 733 } 734 return false; 735 } 736 737 static void gen_commit_hvx(DisasContext *ctx) 738 { 739 int i; 740 741 /* Early exit if not needed */ 742 if (!ctx->need_commit) { 743 g_assert(!pkt_has_hvx_store(ctx->pkt)); 744 return; 745 } 746 747 /* 748 * for (i = 0; i < ctx->vreg_log_idx; i++) { 749 * int rnum = ctx->vreg_log[i]; 750 * env->VRegs[rnum] = env->future_VRegs[rnum]; 751 * } 752 */ 753 for (i = 0; i < ctx->vreg_log_idx; i++) { 754 int rnum = ctx->vreg_log[i]; 755 intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); 756 intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); 757 size_t size = sizeof(MMVector); 758 759 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 760 } 761 762 /* 763 * for (i = 0; i < ctx->qreg_log_idx; i++) { 764 * int rnum = ctx->qreg_log[i]; 765 * env->QRegs[rnum] = env->future_QRegs[rnum]; 766 * } 767 */ 768 for (i = 0; i < ctx->qreg_log_idx; i++) { 769 int rnum = ctx->qreg_log[i]; 770 intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); 771 intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); 772 size_t size = sizeof(MMQReg); 773 774 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); 775 } 776 777 if (pkt_has_hvx_store(ctx->pkt)) { 778 gen_helper_commit_hvx_stores(tcg_env); 779 } 780 } 781 782 static void update_exec_counters(DisasContext *ctx) 783 { 784 Packet *pkt = ctx->pkt; 785 int num_insns = pkt->num_insns; 786 int num_real_insns = 0; 787 int num_hvx_insns = 0; 788 789 for (int i = 0; i < num_insns; i++) { 790 if (!pkt->insn[i].is_endloop && 791 !pkt->insn[i].part1 && 792 !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) { 793 num_real_insns++; 794 } 795 if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) { 796 num_hvx_insns++; 797 } 798 } 799 800 ctx->num_packets++; 801 ctx->num_insns += num_real_insns; 802 ctx->num_hvx_insns += num_hvx_insns; 803 } 804 805 static void gen_commit_packet(DisasContext *ctx) 806 { 807 /* 808 * If there is more than one store in a packet, make sure they are all OK 809 * before proceeding with the rest of the packet commit. 810 * 811 * dczeroa has to be the only store operation in the packet, so we go 812 * ahead and process that first. 813 * 814 * When there is an HVX store, there can also be a scalar store in either 815 * slot 0 or slot1, so we create a mask for the helper to indicate what 816 * work to do. 817 * 818 * When there are two scalar stores, we probe the one in slot 0. 819 * 820 * Note that we don't call the probe helper for packets with only one 821 * store. Therefore, we call process_store_log before anything else 822 * involved in committing the packet. 823 */ 824 Packet *pkt = ctx->pkt; 825 bool has_store_s0 = pkt->pkt_has_store_s0; 826 bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed); 827 bool has_hvx_store = pkt_has_hvx_store(pkt); 828 if (pkt->pkt_has_dczeroa) { 829 /* 830 * The dczeroa will be the store in slot 0, check that we don't have 831 * a store in slot 1 or an HVX store. 832 */ 833 g_assert(!has_store_s1 && !has_hvx_store); 834 process_dczeroa(ctx); 835 } else if (has_hvx_store) { 836 if (!has_store_s0 && !has_store_s1) { 837 TCGv mem_idx = tcg_constant_tl(ctx->mem_idx); 838 gen_helper_probe_hvx_stores(tcg_env, mem_idx); 839 } else { 840 int mask = 0; 841 842 if (has_store_s0) { 843 mask = 844 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1); 845 } 846 if (has_store_s1) { 847 mask = 848 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1); 849 } 850 if (has_hvx_store) { 851 mask = 852 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 853 HAS_HVX_STORES, 1); 854 } 855 if (has_store_s0 && slot_is_predicated(pkt, 0)) { 856 mask = 857 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 858 S0_IS_PRED, 1); 859 } 860 if (has_store_s1 && slot_is_predicated(pkt, 1)) { 861 mask = 862 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, 863 S1_IS_PRED, 1); 864 } 865 mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX, 866 ctx->mem_idx); 867 gen_helper_probe_pkt_scalar_hvx_stores(tcg_env, 868 tcg_constant_tl(mask)); 869 } 870 } else if (has_store_s0 && has_store_s1) { 871 /* 872 * process_store_log will execute the slot 1 store first, 873 * so we only have to probe the store in slot 0 874 */ 875 int args = 0; 876 args = 877 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx); 878 if (slot_is_predicated(pkt, 0)) { 879 args = 880 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1); 881 } 882 TCGv args_tcgv = tcg_constant_tl(args); 883 gen_helper_probe_pkt_scalar_store_s0(tcg_env, args_tcgv); 884 } 885 886 process_store_log(ctx); 887 888 gen_reg_writes(ctx); 889 gen_pred_writes(ctx); 890 if (pkt->pkt_has_hvx) { 891 gen_commit_hvx(ctx); 892 } 893 update_exec_counters(ctx); 894 895 if (pkt->vhist_insn != NULL) { 896 ctx->pre_commit = false; 897 ctx->insn = pkt->vhist_insn; 898 pkt->vhist_insn->generate(ctx); 899 } 900 901 if (pkt->pkt_has_cof) { 902 gen_end_tb(ctx); 903 } 904 } 905 906 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) 907 { 908 uint32_t words[PACKET_WORDS_MAX]; 909 int nwords; 910 Packet pkt; 911 int i; 912 913 nwords = read_packet_words(env, ctx, words); 914 if (!nwords) { 915 gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); 916 return; 917 } 918 919 ctx->pkt = &pkt; 920 if (decode_packet(ctx, nwords, words, &pkt, false) > 0) { 921 pkt.pc = ctx->base.pc_next; 922 gen_start_packet(ctx); 923 for (i = 0; i < pkt.num_insns; i++) { 924 ctx->insn = &pkt.insn[i]; 925 gen_insn(ctx); 926 } 927 gen_commit_packet(ctx); 928 ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; 929 } else { 930 gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); 931 } 932 } 933 934 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, 935 CPUState *cs) 936 { 937 DisasContext *ctx = container_of(dcbase, DisasContext, base); 938 HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs)); 939 uint32_t hex_flags = dcbase->tb->flags; 940 941 ctx->mem_idx = MMU_USER_IDX; 942 ctx->num_packets = 0; 943 ctx->num_insns = 0; 944 ctx->num_hvx_insns = 0; 945 ctx->branch_cond = TCG_COND_NEVER; 946 ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); 947 ctx->short_circuit = hex_cpu->short_circuit; 948 } 949 950 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) 951 { 952 } 953 954 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 955 { 956 DisasContext *ctx = container_of(dcbase, DisasContext, base); 957 958 tcg_gen_insn_start(ctx->base.pc_next); 959 } 960 961 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx) 962 { 963 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 964 bool found_end = false; 965 int nwords; 966 967 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { 968 uint32_t word = translator_ldl(env, &ctx->base, 969 ctx->base.pc_next + nwords * sizeof(uint32_t)); 970 found_end = is_packet_end(word); 971 } 972 uint32_t next_ptr = ctx->base.pc_next + nwords * sizeof(uint32_t); 973 return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE; 974 } 975 976 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) 977 { 978 DisasContext *ctx = container_of(dcbase, DisasContext, base); 979 CPUHexagonState *env = cpu_env(cpu); 980 981 decode_and_translate_packet(env, ctx); 982 983 if (ctx->base.is_jmp == DISAS_NEXT) { 984 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK; 985 target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong); 986 987 if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE || 988 (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max && 989 pkt_crosses_page(env, ctx))) { 990 ctx->base.is_jmp = DISAS_TOO_MANY; 991 } 992 993 /* 994 * The CPU log is used to compare against LLDB single stepping, 995 * so end the TLB after every packet. 996 */ 997 HexagonCPU *hex_cpu = env_archcpu(env); 998 if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { 999 ctx->base.is_jmp = DISAS_TOO_MANY; 1000 } 1001 } 1002 } 1003 1004 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 1005 { 1006 DisasContext *ctx = container_of(dcbase, DisasContext, base); 1007 1008 switch (ctx->base.is_jmp) { 1009 case DISAS_TOO_MANY: 1010 gen_exec_counters(ctx); 1011 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); 1012 tcg_gen_exit_tb(NULL, 0); 1013 break; 1014 case DISAS_NORETURN: 1015 break; 1016 default: 1017 g_assert_not_reached(); 1018 } 1019 } 1020 1021 static const TranslatorOps hexagon_tr_ops = { 1022 .init_disas_context = hexagon_tr_init_disas_context, 1023 .tb_start = hexagon_tr_tb_start, 1024 .insn_start = hexagon_tr_insn_start, 1025 .translate_insn = hexagon_tr_translate_packet, 1026 .tb_stop = hexagon_tr_tb_stop, 1027 }; 1028 1029 void hexagon_translate_code(CPUState *cs, TranslationBlock *tb, 1030 int *max_insns, vaddr pc, void *host_pc) 1031 { 1032 DisasContext ctx; 1033 1034 translator_loop(cs, tb, max_insns, pc, host_pc, 1035 &hexagon_tr_ops, &ctx.base); 1036 } 1037 1038 #define NAME_LEN 64 1039 static char store_addr_names[STORES_MAX][NAME_LEN]; 1040 static char store_width_names[STORES_MAX][NAME_LEN]; 1041 static char store_val32_names[STORES_MAX][NAME_LEN]; 1042 static char store_val64_names[STORES_MAX][NAME_LEN]; 1043 static char vstore_addr_names[VSTORES_MAX][NAME_LEN]; 1044 static char vstore_size_names[VSTORES_MAX][NAME_LEN]; 1045 static char vstore_pending_names[VSTORES_MAX][NAME_LEN]; 1046 1047 void hexagon_translate_init(void) 1048 { 1049 int i; 1050 1051 opcode_init(); 1052 1053 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { 1054 hex_gpr[i] = tcg_global_mem_new(tcg_env, 1055 offsetof(CPUHexagonState, gpr[i]), 1056 hexagon_regnames[i]); 1057 } 1058 hex_new_value_usr = tcg_global_mem_new(tcg_env, 1059 offsetof(CPUHexagonState, new_value_usr), "new_value_usr"); 1060 1061 for (i = 0; i < NUM_PREGS; i++) { 1062 hex_pred[i] = tcg_global_mem_new(tcg_env, 1063 offsetof(CPUHexagonState, pred[i]), 1064 hexagon_prednames[i]); 1065 } 1066 hex_slot_cancelled = tcg_global_mem_new(tcg_env, 1067 offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); 1068 hex_llsc_addr = tcg_global_mem_new(tcg_env, 1069 offsetof(CPUHexagonState, llsc_addr), "llsc_addr"); 1070 hex_llsc_val = tcg_global_mem_new(tcg_env, 1071 offsetof(CPUHexagonState, llsc_val), "llsc_val"); 1072 hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env, 1073 offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); 1074 for (i = 0; i < STORES_MAX; i++) { 1075 snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); 1076 hex_store_addr[i] = tcg_global_mem_new(tcg_env, 1077 offsetof(CPUHexagonState, mem_log_stores[i].va), 1078 store_addr_names[i]); 1079 1080 snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i); 1081 hex_store_width[i] = tcg_global_mem_new(tcg_env, 1082 offsetof(CPUHexagonState, mem_log_stores[i].width), 1083 store_width_names[i]); 1084 1085 snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i); 1086 hex_store_val32[i] = tcg_global_mem_new(tcg_env, 1087 offsetof(CPUHexagonState, mem_log_stores[i].data32), 1088 store_val32_names[i]); 1089 1090 snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i); 1091 hex_store_val64[i] = tcg_global_mem_new_i64(tcg_env, 1092 offsetof(CPUHexagonState, mem_log_stores[i].data64), 1093 store_val64_names[i]); 1094 } 1095 for (i = 0; i < VSTORES_MAX; i++) { 1096 snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i); 1097 hex_vstore_addr[i] = tcg_global_mem_new(tcg_env, 1098 offsetof(CPUHexagonState, vstore[i].va), 1099 vstore_addr_names[i]); 1100 1101 snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i); 1102 hex_vstore_size[i] = tcg_global_mem_new(tcg_env, 1103 offsetof(CPUHexagonState, vstore[i].size), 1104 vstore_size_names[i]); 1105 1106 snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i); 1107 hex_vstore_pending[i] = tcg_global_mem_new(tcg_env, 1108 offsetof(CPUHexagonState, vstore_pending[i]), 1109 vstore_pending_names[i]); 1110 } 1111 } 1112