1 /*
2 * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #define QEMU_GENERATE
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/helper-gen.h"
24 #include "exec/helper-proto.h"
25 #include "exec/translation-block.h"
26 #include "accel/tcg/cpu-ldst.h"
27 #include "exec/log.h"
28 #include "internal.h"
29 #include "attribs.h"
30 #include "insn.h"
31 #include "decode.h"
32 #include "translate.h"
33 #include "genptr.h"
34 #include "printinsn.h"
35
36 #define HELPER_H "helper.h"
37 #include "exec/helper-info.c.inc"
38 #undef HELPER_H
39
40 #include "analyze_funcs_generated.c.inc"
41
42 typedef void (*AnalyzeInsn)(DisasContext *ctx);
43 static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
44 #define OPCODE(X) [X] = analyze_##X
45 #include "opcodes_def_generated.h.inc"
46 #undef OPCODE
47 };
48
49 TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
50 TCGv hex_pred[NUM_PREGS];
51 TCGv hex_slot_cancelled;
52 TCGv hex_new_value_usr;
53 TCGv hex_store_addr[STORES_MAX];
54 TCGv hex_store_width[STORES_MAX];
55 TCGv hex_store_val32[STORES_MAX];
56 TCGv_i64 hex_store_val64[STORES_MAX];
57 TCGv hex_llsc_addr;
58 TCGv hex_llsc_val;
59 TCGv_i64 hex_llsc_val_i64;
60 TCGv hex_vstore_addr[VSTORES_MAX];
61 TCGv hex_vstore_size[VSTORES_MAX];
62 TCGv hex_vstore_pending[VSTORES_MAX];
63
64 static const char * const hexagon_prednames[] = {
65 "p0", "p1", "p2", "p3"
66 };
67
ctx_future_vreg_off(DisasContext * ctx,int regnum,int num,bool alloc_ok)68 intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
69 int num, bool alloc_ok)
70 {
71 intptr_t offset;
72
73 if (!ctx->need_commit) {
74 return offsetof(CPUHexagonState, VRegs[regnum]);
75 }
76
77 /* See if it is already allocated */
78 for (int i = 0; i < ctx->future_vregs_idx; i++) {
79 if (ctx->future_vregs_num[i] == regnum) {
80 return offsetof(CPUHexagonState, future_VRegs[i]);
81 }
82 }
83
84 g_assert(alloc_ok);
85 offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
86 for (int i = 0; i < num; i++) {
87 ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
88 }
89 ctx->future_vregs_idx += num;
90 g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
91 return offset;
92 }
93
ctx_tmp_vreg_off(DisasContext * ctx,int regnum,int num,bool alloc_ok)94 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
95 int num, bool alloc_ok)
96 {
97 intptr_t offset;
98
99 /* See if it is already allocated */
100 for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
101 if (ctx->tmp_vregs_num[i] == regnum) {
102 return offsetof(CPUHexagonState, tmp_VRegs[i]);
103 }
104 }
105
106 g_assert(alloc_ok);
107 offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
108 for (int i = 0; i < num; i++) {
109 ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
110 }
111 ctx->tmp_vregs_idx += num;
112 g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
113 return offset;
114 }
115
gen_exception_raw(int excp)116 static void gen_exception_raw(int excp)
117 {
118 gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));
119 }
120
gen_exec_counters(DisasContext * ctx)121 static void gen_exec_counters(DisasContext *ctx)
122 {
123 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
124 hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
125 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
126 hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
127 tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
128 hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
129 }
130
use_goto_tb(DisasContext * ctx,target_ulong dest)131 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
132 {
133 return translator_use_goto_tb(&ctx->base, dest);
134 }
135
gen_goto_tb(DisasContext * ctx,int idx,target_ulong dest,bool move_to_pc)136 static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool
137 move_to_pc)
138 {
139 if (use_goto_tb(ctx, dest)) {
140 tcg_gen_goto_tb(idx);
141 if (move_to_pc) {
142 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
143 }
144 tcg_gen_exit_tb(ctx->base.tb, idx);
145 } else {
146 if (move_to_pc) {
147 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
148 }
149 tcg_gen_lookup_and_goto_ptr();
150 }
151 }
152
gen_end_tb(DisasContext * ctx)153 static void gen_end_tb(DisasContext *ctx)
154 {
155 Packet *pkt = ctx->pkt;
156
157 gen_exec_counters(ctx);
158
159 if (ctx->branch_cond != TCG_COND_NEVER) {
160 if (ctx->branch_cond != TCG_COND_ALWAYS) {
161 TCGLabel *skip = gen_new_label();
162 tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip);
163 gen_goto_tb(ctx, 0, ctx->branch_dest, true);
164 gen_set_label(skip);
165 gen_goto_tb(ctx, 1, ctx->next_PC, false);
166 } else {
167 gen_goto_tb(ctx, 0, ctx->branch_dest, true);
168 }
169 } else if (ctx->is_tight_loop &&
170 pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
171 /*
172 * When we're in a tight loop, we defer the endloop0 processing
173 * to take advantage of direct block chaining
174 */
175 TCGLabel *skip = gen_new_label();
176 tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
177 tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
178 gen_goto_tb(ctx, 0, ctx->base.tb->pc, true);
179 gen_set_label(skip);
180 gen_goto_tb(ctx, 1, ctx->next_PC, false);
181 } else {
182 tcg_gen_lookup_and_goto_ptr();
183 }
184
185 ctx->base.is_jmp = DISAS_NORETURN;
186 }
187
gen_exception_end_tb(DisasContext * ctx,int excp)188 static void gen_exception_end_tb(DisasContext *ctx, int excp)
189 {
190 gen_exec_counters(ctx);
191 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
192 gen_exception_raw(excp);
193 ctx->base.is_jmp = DISAS_NORETURN;
194
195 }
196
read_packet_words(CPUHexagonState * env,DisasContext * ctx,uint32_t words[])197 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
198 uint32_t words[])
199 {
200 bool found_end = false;
201 int nwords, max_words;
202
203 memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
204 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
205 words[nwords] =
206 translator_ldl(env, &ctx->base,
207 ctx->base.pc_next + nwords * sizeof(uint32_t));
208 found_end = is_packet_end(words[nwords]);
209 }
210 if (!found_end) {
211 /* Read too many words without finding the end */
212 return 0;
213 }
214
215 /* Check for page boundary crossing */
216 max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
217 if (nwords > max_words) {
218 /* We can only cross a page boundary at the beginning of a TB */
219 g_assert(ctx->base.num_insns == 1);
220 }
221
222 return nwords;
223 }
224
check_for_attrib(Packet * pkt,int attrib)225 static bool check_for_attrib(Packet *pkt, int attrib)
226 {
227 for (int i = 0; i < pkt->num_insns; i++) {
228 if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
229 return true;
230 }
231 }
232 return false;
233 }
234
need_slot_cancelled(Packet * pkt)235 static bool need_slot_cancelled(Packet *pkt)
236 {
237 /* We only need slot_cancelled for conditional store instructions */
238 for (int i = 0; i < pkt->num_insns; i++) {
239 uint16_t opcode = pkt->insn[i].opcode;
240 if (GET_ATTRIB(opcode, A_CONDEXEC) &&
241 GET_ATTRIB(opcode, A_SCALAR_STORE)) {
242 return true;
243 }
244 }
245 return false;
246 }
247
need_next_PC(DisasContext * ctx)248 static bool need_next_PC(DisasContext *ctx)
249 {
250 Packet *pkt = ctx->pkt;
251
252 /* Check for conditional control flow or HW loop end */
253 for (int i = 0; i < pkt->num_insns; i++) {
254 uint16_t opcode = pkt->insn[i].opcode;
255 if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
256 return true;
257 }
258 if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
259 GET_ATTRIB(opcode, A_HWLOOP1_END)) {
260 return true;
261 }
262 }
263 return false;
264 }
265
266 /*
267 * The opcode_analyze functions mark most of the writes in a packet
268 * However, there are some implicit writes marked as attributes
269 * of the applicable instructions.
270 */
mark_implicit_reg_write(DisasContext * ctx,int attrib,int rnum)271 static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
272 {
273 uint16_t opcode = ctx->insn->opcode;
274 if (GET_ATTRIB(opcode, attrib)) {
275 /*
276 * USR is used to set overflow and FP exceptions,
277 * so treat it as conditional
278 */
279 bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
280 rnum == HEX_REG_USR;
281
282 /* LC0/LC1 is conditionally written by endloop instructions */
283 if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
284 (opcode == J2_endloop0 ||
285 opcode == J2_endloop1 ||
286 opcode == J2_endloop01)) {
287 is_predicated = true;
288 }
289
290 ctx_log_reg_write(ctx, rnum, is_predicated);
291 }
292 }
293
mark_implicit_reg_writes(DisasContext * ctx)294 static void mark_implicit_reg_writes(DisasContext *ctx)
295 {
296 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP);
297 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP);
298 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR);
299 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
300 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
301 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
302 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
303 mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
304 mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
305 }
306
mark_implicit_pred_write(DisasContext * ctx,int attrib,int pnum)307 static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
308 {
309 if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
310 ctx_log_pred_write(ctx, pnum);
311 }
312 }
313
mark_implicit_pred_writes(DisasContext * ctx)314 static void mark_implicit_pred_writes(DisasContext *ctx)
315 {
316 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
317 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
318 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
319 mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
320 }
321
pkt_raises_exception(Packet * pkt)322 static bool pkt_raises_exception(Packet *pkt)
323 {
324 if (check_for_attrib(pkt, A_LOAD) ||
325 check_for_attrib(pkt, A_STORE)) {
326 return true;
327 }
328 return false;
329 }
330
need_commit(DisasContext * ctx)331 static bool need_commit(DisasContext *ctx)
332 {
333 Packet *pkt = ctx->pkt;
334
335 /*
336 * If the short-circuit property is set to false, we'll always do the commit
337 */
338 if (!ctx->short_circuit) {
339 return true;
340 }
341
342 if (pkt_raises_exception(pkt)) {
343 return true;
344 }
345
346 /* Registers with immutability flags require new_value */
347 for (int i = 0; i < ctx->reg_log_idx; i++) {
348 int rnum = ctx->reg_log[i];
349 if (reg_immut_masks[rnum]) {
350 return true;
351 }
352 }
353
354 /* Floating point instructions are hard-coded to use new_value */
355 if (check_for_attrib(pkt, A_FPOP)) {
356 return true;
357 }
358
359 if (ctx->read_after_write || ctx->has_hvx_overlap) {
360 return true;
361 }
362
363 return false;
364 }
365
mark_implicit_pred_read(DisasContext * ctx,int attrib,int pnum)366 static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
367 {
368 if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
369 ctx_log_pred_read(ctx, pnum);
370 }
371 }
372
mark_implicit_pred_reads(DisasContext * ctx)373 static void mark_implicit_pred_reads(DisasContext *ctx)
374 {
375 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0);
376 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1);
377 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2);
378 mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3);
379 }
380
analyze_packet(DisasContext * ctx)381 static void analyze_packet(DisasContext *ctx)
382 {
383 Packet *pkt = ctx->pkt;
384 ctx->read_after_write = false;
385 ctx->has_hvx_overlap = false;
386 for (int i = 0; i < pkt->num_insns; i++) {
387 Insn *insn = &pkt->insn[i];
388 ctx->insn = insn;
389 if (opcode_analyze[insn->opcode]) {
390 opcode_analyze[insn->opcode](ctx);
391 }
392 mark_implicit_reg_writes(ctx);
393 mark_implicit_pred_writes(ctx);
394 mark_implicit_pred_reads(ctx);
395 }
396
397 ctx->need_commit = need_commit(ctx);
398 }
399
gen_start_packet(DisasContext * ctx)400 static void gen_start_packet(DisasContext *ctx)
401 {
402 Packet *pkt = ctx->pkt;
403 target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
404 int i;
405
406 /* Clear out the disassembly context */
407 ctx->next_PC = next_PC;
408 ctx->reg_log_idx = 0;
409 bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
410 bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
411 ctx->preg_log_idx = 0;
412 bitmap_zero(ctx->pregs_written, NUM_PREGS);
413 ctx->future_vregs_idx = 0;
414 ctx->tmp_vregs_idx = 0;
415 ctx->vreg_log_idx = 0;
416 bitmap_zero(ctx->vregs_written, NUM_VREGS);
417 bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
418 bitmap_zero(ctx->vregs_updated, NUM_VREGS);
419 bitmap_zero(ctx->vregs_select, NUM_VREGS);
420 bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
421 bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
422 bitmap_zero(ctx->qregs_written, NUM_QREGS);
423 ctx->qreg_log_idx = 0;
424 for (i = 0; i < STORES_MAX; i++) {
425 ctx->store_width[i] = 0;
426 }
427 ctx->s1_store_processed = false;
428 ctx->pre_commit = true;
429 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
430 ctx->new_value[i] = NULL;
431 }
432 for (i = 0; i < NUM_PREGS; i++) {
433 ctx->new_pred_value[i] = NULL;
434 }
435
436 analyze_packet(ctx);
437
438 /*
439 * pregs_written is used both in the analyze phase as well as the code
440 * gen phase, so clear it again.
441 */
442 bitmap_zero(ctx->pregs_written, NUM_PREGS);
443
444 /* Initialize the runtime state for packet semantics */
445 if (need_slot_cancelled(pkt)) {
446 tcg_gen_movi_tl(hex_slot_cancelled, 0);
447 }
448 ctx->branch_taken = NULL;
449 if (pkt->pkt_has_cof) {
450 ctx->branch_taken = tcg_temp_new();
451 if (pkt->pkt_has_multi_cof) {
452 tcg_gen_movi_tl(ctx->branch_taken, 0);
453 }
454 if (need_next_PC(ctx)) {
455 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
456 }
457 }
458
459 /* Preload the predicated registers into get_result_gpr(ctx, i) */
460 if (ctx->need_commit &&
461 !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
462 i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
463 while (i < TOTAL_PER_THREAD_REGS) {
464 tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]);
465 i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
466 i + 1);
467 }
468 }
469
470 /*
471 * Preload the predicated pred registers into ctx->new_pred_value[pred_num]
472 * Only endloop instructions conditionally write to pred registers
473 */
474 if (ctx->need_commit && pkt->pkt_has_endloop) {
475 for (i = 0; i < ctx->preg_log_idx; i++) {
476 int pred_num = ctx->preg_log[i];
477 ctx->new_pred_value[pred_num] = tcg_temp_new();
478 tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]);
479 }
480 }
481
482 /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
483 if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
484 i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
485 while (i < NUM_VREGS) {
486 const intptr_t VdV_off =
487 ctx_future_vreg_off(ctx, i, 1, true);
488 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
489 tcg_gen_gvec_mov(MO_64, VdV_off,
490 src_off,
491 sizeof(MMVector),
492 sizeof(MMVector));
493 i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
494 }
495 }
496 if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
497 i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
498 while (i < NUM_VREGS) {
499 const intptr_t VdV_off =
500 ctx_tmp_vreg_off(ctx, i, 1, true);
501 intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
502 tcg_gen_gvec_mov(MO_64, VdV_off,
503 src_off,
504 sizeof(MMVector),
505 sizeof(MMVector));
506 i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
507 }
508 }
509 }
510
is_gather_store_insn(DisasContext * ctx)511 bool is_gather_store_insn(DisasContext *ctx)
512 {
513 Packet *pkt = ctx->pkt;
514 Insn *insn = ctx->insn;
515 if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
516 insn->new_value_producer_slot == 1) {
517 /* Look for gather instruction */
518 for (int i = 0; i < pkt->num_insns; i++) {
519 Insn *in = &pkt->insn[i];
520 if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
521 return true;
522 }
523 }
524 }
525 return false;
526 }
527
mark_store_width(DisasContext * ctx)528 static void mark_store_width(DisasContext *ctx)
529 {
530 uint16_t opcode = ctx->insn->opcode;
531 uint32_t slot = ctx->insn->slot;
532 uint8_t width = 0;
533
534 if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
535 if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) {
536 return;
537 }
538 if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
539 width |= 1;
540 }
541 if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
542 width |= 2;
543 }
544 if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
545 width |= 4;
546 }
547 if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
548 width |= 8;
549 }
550 tcg_debug_assert(is_power_of_2(width));
551 ctx->store_width[slot] = width;
552 }
553 }
554
gen_insn(DisasContext * ctx)555 static void gen_insn(DisasContext *ctx)
556 {
557 if (ctx->insn->generate) {
558 ctx->insn->generate(ctx);
559 mark_store_width(ctx);
560 } else {
561 gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE);
562 }
563 }
564
565 /*
566 * Helpers for generating the packet commit
567 */
gen_reg_writes(DisasContext * ctx)568 static void gen_reg_writes(DisasContext *ctx)
569 {
570 int i;
571
572 /* Early exit if not needed */
573 if (!ctx->need_commit) {
574 return;
575 }
576
577 for (i = 0; i < ctx->reg_log_idx; i++) {
578 int reg_num = ctx->reg_log[i];
579
580 tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num));
581
582 /*
583 * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
584 * If we write to SA0, we have to turn off tight loop handling.
585 */
586 if (reg_num == HEX_REG_SA0) {
587 ctx->is_tight_loop = false;
588 }
589 }
590 }
591
gen_pred_writes(DisasContext * ctx)592 static void gen_pred_writes(DisasContext *ctx)
593 {
594 /* Early exit if not needed or the log is empty */
595 if (!ctx->need_commit || !ctx->preg_log_idx) {
596 return;
597 }
598
599 for (int i = 0; i < ctx->preg_log_idx; i++) {
600 int pred_num = ctx->preg_log[i];
601 tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]);
602 }
603 }
604
slot_is_predicated(Packet * pkt,int slot_num)605 static bool slot_is_predicated(Packet *pkt, int slot_num)
606 {
607 for (int i = 0; i < pkt->num_insns; i++) {
608 if (pkt->insn[i].slot == slot_num) {
609 return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
610 }
611 }
612 /* If we get to here, we didn't find an instruction in the requested slot */
613 g_assert_not_reached();
614 }
615
process_store(DisasContext * ctx,int slot_num)616 void process_store(DisasContext *ctx, int slot_num)
617 {
618 bool is_predicated = slot_is_predicated(ctx->pkt, slot_num);
619 TCGLabel *label_end = NULL;
620
621 /*
622 * We may have already processed this store
623 * See CHECK_NOSHUF in macros.h
624 */
625 if (slot_num == 1 && ctx->s1_store_processed) {
626 return;
627 }
628 ctx->s1_store_processed = true;
629
630 if (is_predicated) {
631 TCGv cancelled = tcg_temp_new();
632 label_end = gen_new_label();
633
634 /* Don't do anything if the slot was cancelled */
635 tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
636 tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
637 }
638 {
639 TCGv address = tcg_temp_new();
640 tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
641
642 /*
643 * If we know the width from the DisasContext, we can
644 * generate much cleaner code.
645 * Unfortunately, not all instructions execute the fSTORE
646 * macro during code generation. Anything that uses the
647 * generic helper will have this problem. Instructions
648 * that use fWRAP to generate proper TCG code will be OK.
649 */
650 switch (ctx->store_width[slot_num]) {
651 case 1:
652 tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
653 hex_store_addr[slot_num],
654 ctx->mem_idx, MO_UB);
655 break;
656 case 2:
657 tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
658 hex_store_addr[slot_num],
659 ctx->mem_idx, MO_LE | MO_UW);
660 break;
661 case 4:
662 tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
663 hex_store_addr[slot_num],
664 ctx->mem_idx, MO_LE | MO_UL);
665 break;
666 case 8:
667 tcg_gen_qemu_st_i64(hex_store_val64[slot_num],
668 hex_store_addr[slot_num],
669 ctx->mem_idx, MO_LE | MO_UQ);
670 break;
671 default:
672 {
673 /*
674 * If we get to here, we don't know the width at
675 * TCG generation time, we'll use a helper to
676 * avoid branching based on the width at runtime.
677 */
678 TCGv slot = tcg_constant_tl(slot_num);
679 gen_helper_commit_store(tcg_env, slot);
680 }
681 }
682 }
683 if (is_predicated) {
684 gen_set_label(label_end);
685 }
686 }
687
process_store_log(DisasContext * ctx)688 static void process_store_log(DisasContext *ctx)
689 {
690 /*
691 * When a packet has two stores, the hardware processes
692 * slot 1 and then slot 0. This will be important when
693 * the memory accesses overlap.
694 */
695 Packet *pkt = ctx->pkt;
696 if (pkt->pkt_has_store_s1) {
697 g_assert(!pkt->pkt_has_dczeroa);
698 process_store(ctx, 1);
699 }
700 if (pkt->pkt_has_store_s0) {
701 g_assert(!pkt->pkt_has_dczeroa);
702 process_store(ctx, 0);
703 }
704 }
705
706 /* Zero out a 32-bit cache line */
process_dczeroa(DisasContext * ctx)707 static void process_dczeroa(DisasContext *ctx)
708 {
709 if (ctx->pkt->pkt_has_dczeroa) {
710 /* Store 32 bytes of zero starting at (addr & ~0x1f) */
711 TCGv addr = tcg_temp_new();
712 TCGv_i64 zero = tcg_constant_i64(0);
713
714 tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f);
715 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
716 tcg_gen_addi_tl(addr, addr, 8);
717 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
718 tcg_gen_addi_tl(addr, addr, 8);
719 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
720 tcg_gen_addi_tl(addr, addr, 8);
721 tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
722 }
723 }
724
pkt_has_hvx_store(Packet * pkt)725 static bool pkt_has_hvx_store(Packet *pkt)
726 {
727 int i;
728 for (i = 0; i < pkt->num_insns; i++) {
729 int opcode = pkt->insn[i].opcode;
730 if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
731 return true;
732 }
733 }
734 return false;
735 }
736
gen_commit_hvx(DisasContext * ctx)737 static void gen_commit_hvx(DisasContext *ctx)
738 {
739 int i;
740
741 /* Early exit if not needed */
742 if (!ctx->need_commit) {
743 g_assert(!pkt_has_hvx_store(ctx->pkt));
744 return;
745 }
746
747 /*
748 * for (i = 0; i < ctx->vreg_log_idx; i++) {
749 * int rnum = ctx->vreg_log[i];
750 * env->VRegs[rnum] = env->future_VRegs[rnum];
751 * }
752 */
753 for (i = 0; i < ctx->vreg_log_idx; i++) {
754 int rnum = ctx->vreg_log[i];
755 intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
756 intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
757 size_t size = sizeof(MMVector);
758
759 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
760 }
761
762 /*
763 * for (i = 0; i < ctx->qreg_log_idx; i++) {
764 * int rnum = ctx->qreg_log[i];
765 * env->QRegs[rnum] = env->future_QRegs[rnum];
766 * }
767 */
768 for (i = 0; i < ctx->qreg_log_idx; i++) {
769 int rnum = ctx->qreg_log[i];
770 intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
771 intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
772 size_t size = sizeof(MMQReg);
773
774 tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
775 }
776
777 if (pkt_has_hvx_store(ctx->pkt)) {
778 gen_helper_commit_hvx_stores(tcg_env);
779 }
780 }
781
update_exec_counters(DisasContext * ctx)782 static void update_exec_counters(DisasContext *ctx)
783 {
784 Packet *pkt = ctx->pkt;
785 int num_insns = pkt->num_insns;
786 int num_real_insns = 0;
787 int num_hvx_insns = 0;
788
789 for (int i = 0; i < num_insns; i++) {
790 if (!pkt->insn[i].is_endloop &&
791 !pkt->insn[i].part1 &&
792 !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
793 num_real_insns++;
794 }
795 if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
796 num_hvx_insns++;
797 }
798 }
799
800 ctx->num_packets++;
801 ctx->num_insns += num_real_insns;
802 ctx->num_hvx_insns += num_hvx_insns;
803 }
804
gen_commit_packet(DisasContext * ctx)805 static void gen_commit_packet(DisasContext *ctx)
806 {
807 /*
808 * If there is more than one store in a packet, make sure they are all OK
809 * before proceeding with the rest of the packet commit.
810 *
811 * dczeroa has to be the only store operation in the packet, so we go
812 * ahead and process that first.
813 *
814 * When there is an HVX store, there can also be a scalar store in either
815 * slot 0 or slot1, so we create a mask for the helper to indicate what
816 * work to do.
817 *
818 * When there are two scalar stores, we probe the one in slot 0.
819 *
820 * Note that we don't call the probe helper for packets with only one
821 * store. Therefore, we call process_store_log before anything else
822 * involved in committing the packet.
823 */
824 Packet *pkt = ctx->pkt;
825 bool has_store_s0 = pkt->pkt_has_store_s0;
826 bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
827 bool has_hvx_store = pkt_has_hvx_store(pkt);
828 if (pkt->pkt_has_dczeroa) {
829 /*
830 * The dczeroa will be the store in slot 0, check that we don't have
831 * a store in slot 1 or an HVX store.
832 */
833 g_assert(!has_store_s1 && !has_hvx_store);
834 process_dczeroa(ctx);
835 } else if (has_hvx_store) {
836 if (!has_store_s0 && !has_store_s1) {
837 TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
838 gen_helper_probe_hvx_stores(tcg_env, mem_idx);
839 } else {
840 int mask = 0;
841
842 if (has_store_s0) {
843 mask =
844 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
845 }
846 if (has_store_s1) {
847 mask =
848 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
849 }
850 if (has_hvx_store) {
851 mask =
852 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
853 HAS_HVX_STORES, 1);
854 }
855 if (has_store_s0 && slot_is_predicated(pkt, 0)) {
856 mask =
857 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
858 S0_IS_PRED, 1);
859 }
860 if (has_store_s1 && slot_is_predicated(pkt, 1)) {
861 mask =
862 FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
863 S1_IS_PRED, 1);
864 }
865 mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX,
866 ctx->mem_idx);
867 gen_helper_probe_pkt_scalar_hvx_stores(tcg_env,
868 tcg_constant_tl(mask));
869 }
870 } else if (has_store_s0 && has_store_s1) {
871 /*
872 * process_store_log will execute the slot 1 store first,
873 * so we only have to probe the store in slot 0
874 */
875 int args = 0;
876 args =
877 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
878 if (slot_is_predicated(pkt, 0)) {
879 args =
880 FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
881 }
882 TCGv args_tcgv = tcg_constant_tl(args);
883 gen_helper_probe_pkt_scalar_store_s0(tcg_env, args_tcgv);
884 }
885
886 process_store_log(ctx);
887
888 gen_reg_writes(ctx);
889 gen_pred_writes(ctx);
890 if (pkt->pkt_has_hvx) {
891 gen_commit_hvx(ctx);
892 }
893 update_exec_counters(ctx);
894
895 if (pkt->vhist_insn != NULL) {
896 ctx->pre_commit = false;
897 ctx->insn = pkt->vhist_insn;
898 pkt->vhist_insn->generate(ctx);
899 }
900
901 if (pkt->pkt_has_cof) {
902 gen_end_tb(ctx);
903 }
904 }
905
decode_and_translate_packet(CPUHexagonState * env,DisasContext * ctx)906 static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
907 {
908 uint32_t words[PACKET_WORDS_MAX];
909 int nwords;
910 Packet pkt;
911 int i;
912
913 nwords = read_packet_words(env, ctx, words);
914 if (!nwords) {
915 gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET);
916 return;
917 }
918
919 ctx->pkt = &pkt;
920 if (decode_packet(ctx, nwords, words, &pkt, false) > 0) {
921 pkt.pc = ctx->base.pc_next;
922 gen_start_packet(ctx);
923 for (i = 0; i < pkt.num_insns; i++) {
924 ctx->insn = &pkt.insn[i];
925 gen_insn(ctx);
926 }
927 gen_commit_packet(ctx);
928 ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
929 } else {
930 gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET);
931 }
932 }
933
hexagon_tr_init_disas_context(DisasContextBase * dcbase,CPUState * cs)934 static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
935 CPUState *cs)
936 {
937 DisasContext *ctx = container_of(dcbase, DisasContext, base);
938 HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs));
939 uint32_t hex_flags = dcbase->tb->flags;
940
941 ctx->mem_idx = MMU_USER_IDX;
942 ctx->num_packets = 0;
943 ctx->num_insns = 0;
944 ctx->num_hvx_insns = 0;
945 ctx->branch_cond = TCG_COND_NEVER;
946 ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
947 ctx->short_circuit = hex_cpu->short_circuit;
948 }
949
hexagon_tr_tb_start(DisasContextBase * db,CPUState * cpu)950 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
951 {
952 }
953
hexagon_tr_insn_start(DisasContextBase * dcbase,CPUState * cpu)954 static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
955 {
956 DisasContext *ctx = container_of(dcbase, DisasContext, base);
957
958 tcg_gen_insn_start(ctx->base.pc_next);
959 }
960
pkt_crosses_page(CPUHexagonState * env,DisasContext * ctx)961 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
962 {
963 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
964 bool found_end = false;
965 int nwords;
966
967 for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
968 uint32_t word = translator_ldl(env, &ctx->base,
969 ctx->base.pc_next + nwords * sizeof(uint32_t));
970 found_end = is_packet_end(word);
971 }
972 uint32_t next_ptr = ctx->base.pc_next + nwords * sizeof(uint32_t);
973 return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
974 }
975
hexagon_tr_translate_packet(DisasContextBase * dcbase,CPUState * cpu)976 static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
977 {
978 DisasContext *ctx = container_of(dcbase, DisasContext, base);
979 CPUHexagonState *env = cpu_env(cpu);
980
981 decode_and_translate_packet(env, ctx);
982
983 if (ctx->base.is_jmp == DISAS_NEXT) {
984 target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
985 target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
986
987 if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
988 (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
989 pkt_crosses_page(env, ctx))) {
990 ctx->base.is_jmp = DISAS_TOO_MANY;
991 }
992
993 /*
994 * The CPU log is used to compare against LLDB single stepping,
995 * so end the TLB after every packet.
996 */
997 HexagonCPU *hex_cpu = env_archcpu(env);
998 if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
999 ctx->base.is_jmp = DISAS_TOO_MANY;
1000 }
1001 }
1002 }
1003
hexagon_tr_tb_stop(DisasContextBase * dcbase,CPUState * cpu)1004 static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
1005 {
1006 DisasContext *ctx = container_of(dcbase, DisasContext, base);
1007
1008 switch (ctx->base.is_jmp) {
1009 case DISAS_TOO_MANY:
1010 gen_exec_counters(ctx);
1011 tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
1012 tcg_gen_exit_tb(NULL, 0);
1013 break;
1014 case DISAS_NORETURN:
1015 break;
1016 default:
1017 g_assert_not_reached();
1018 }
1019 }
1020
1021 static const TranslatorOps hexagon_tr_ops = {
1022 .init_disas_context = hexagon_tr_init_disas_context,
1023 .tb_start = hexagon_tr_tb_start,
1024 .insn_start = hexagon_tr_insn_start,
1025 .translate_insn = hexagon_tr_translate_packet,
1026 .tb_stop = hexagon_tr_tb_stop,
1027 };
1028
hexagon_translate_code(CPUState * cs,TranslationBlock * tb,int * max_insns,vaddr pc,void * host_pc)1029 void hexagon_translate_code(CPUState *cs, TranslationBlock *tb,
1030 int *max_insns, vaddr pc, void *host_pc)
1031 {
1032 DisasContext ctx;
1033
1034 translator_loop(cs, tb, max_insns, pc, host_pc,
1035 &hexagon_tr_ops, &ctx.base);
1036 }
1037
1038 #define NAME_LEN 64
1039 static char store_addr_names[STORES_MAX][NAME_LEN];
1040 static char store_width_names[STORES_MAX][NAME_LEN];
1041 static char store_val32_names[STORES_MAX][NAME_LEN];
1042 static char store_val64_names[STORES_MAX][NAME_LEN];
1043 static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
1044 static char vstore_size_names[VSTORES_MAX][NAME_LEN];
1045 static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
1046
hexagon_translate_init(void)1047 void hexagon_translate_init(void)
1048 {
1049 int i;
1050
1051 opcode_init();
1052
1053 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
1054 hex_gpr[i] = tcg_global_mem_new(tcg_env,
1055 offsetof(CPUHexagonState, gpr[i]),
1056 hexagon_regnames[i]);
1057 }
1058 hex_new_value_usr = tcg_global_mem_new(tcg_env,
1059 offsetof(CPUHexagonState, new_value_usr), "new_value_usr");
1060
1061 for (i = 0; i < NUM_PREGS; i++) {
1062 hex_pred[i] = tcg_global_mem_new(tcg_env,
1063 offsetof(CPUHexagonState, pred[i]),
1064 hexagon_prednames[i]);
1065 }
1066 hex_slot_cancelled = tcg_global_mem_new(tcg_env,
1067 offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
1068 hex_llsc_addr = tcg_global_mem_new(tcg_env,
1069 offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
1070 hex_llsc_val = tcg_global_mem_new(tcg_env,
1071 offsetof(CPUHexagonState, llsc_val), "llsc_val");
1072 hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env,
1073 offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
1074 for (i = 0; i < STORES_MAX; i++) {
1075 snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
1076 hex_store_addr[i] = tcg_global_mem_new(tcg_env,
1077 offsetof(CPUHexagonState, mem_log_stores[i].va),
1078 store_addr_names[i]);
1079
1080 snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
1081 hex_store_width[i] = tcg_global_mem_new(tcg_env,
1082 offsetof(CPUHexagonState, mem_log_stores[i].width),
1083 store_width_names[i]);
1084
1085 snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
1086 hex_store_val32[i] = tcg_global_mem_new(tcg_env,
1087 offsetof(CPUHexagonState, mem_log_stores[i].data32),
1088 store_val32_names[i]);
1089
1090 snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
1091 hex_store_val64[i] = tcg_global_mem_new_i64(tcg_env,
1092 offsetof(CPUHexagonState, mem_log_stores[i].data64),
1093 store_val64_names[i]);
1094 }
1095 for (i = 0; i < VSTORES_MAX; i++) {
1096 snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
1097 hex_vstore_addr[i] = tcg_global_mem_new(tcg_env,
1098 offsetof(CPUHexagonState, vstore[i].va),
1099 vstore_addr_names[i]);
1100
1101 snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
1102 hex_vstore_size[i] = tcg_global_mem_new(tcg_env,
1103 offsetof(CPUHexagonState, vstore[i].size),
1104 vstore_size_names[i]);
1105
1106 snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
1107 hex_vstore_pending[i] = tcg_global_mem_new(tcg_env,
1108 offsetof(CPUHexagonState, vstore_pending[i]),
1109 vstore_pending_names[i]);
1110 }
1111 }
1112