xref: /qemu/tcg/tcg.c (revision 70ce076fa6dff60585c229a4b641b13e64bf03cf)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_NotImplemented = -1,
865 #include "tcg-target-con-set.h"
866 } TCGConstraintSetIndex;
867 
868 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
869 
870 #undef C_O0_I1
871 #undef C_O0_I2
872 #undef C_O0_I3
873 #undef C_O0_I4
874 #undef C_O1_I1
875 #undef C_O1_I2
876 #undef C_O1_I3
877 #undef C_O1_I4
878 #undef C_N1_I2
879 #undef C_N1O1_I1
880 #undef C_N2_I1
881 #undef C_O2_I1
882 #undef C_O2_I2
883 #undef C_O2_I3
884 #undef C_O2_I4
885 #undef C_N1_O1_I4
886 
887 /* Put all of the constraint sets into an array, indexed by the enum. */
888 
889 typedef struct TCGConstraintSet {
890     uint8_t nb_oargs, nb_iargs;
891     const char *args_ct_str[TCG_MAX_OP_ARGS];
892 } TCGConstraintSet;
893 
894 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
895 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
896 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
897 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
898 
899 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
900 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
901 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
902 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
903 
904 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
905 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
906 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
907 
908 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
909 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
910 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
911 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
912 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
913 
914 static const TCGConstraintSet constraint_sets[] = {
915 #include "tcg-target-con-set.h"
916 };
917 
918 #undef C_O0_I1
919 #undef C_O0_I2
920 #undef C_O0_I3
921 #undef C_O0_I4
922 #undef C_O1_I1
923 #undef C_O1_I2
924 #undef C_O1_I3
925 #undef C_O1_I4
926 #undef C_N1_I2
927 #undef C_N1O1_I1
928 #undef C_N2_I1
929 #undef C_O2_I1
930 #undef C_O2_I2
931 #undef C_O2_I3
932 #undef C_O2_I4
933 #undef C_N1_O1_I4
934 
935 /* Expand the enumerator to be returned from tcg_target_op_def(). */
936 
937 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
938 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
939 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
940 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
941 
942 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
943 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
944 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
945 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
946 
947 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
948 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
949 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
950 
951 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
952 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
953 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
954 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
955 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
956 
957 #include "tcg-target.c.inc"
958 
959 #ifndef CONFIG_TCG_INTERPRETER
960 /* Validate CPUTLBDescFast placement. */
961 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
962                         sizeof(CPUNegativeOffsetState))
963                   < MIN_TLB_MASK_TABLE_OFS);
964 #endif
965 
966 /*
967  * All TCG threads except the parent (i.e. the one that called tcg_context_init
968  * and registered the target's TCG globals) must register with this function
969  * before initiating translation.
970  *
971  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
972  * of tcg_region_init() for the reasoning behind this.
973  *
974  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
975  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
976  * is not used anymore for translation once this function is called.
977  *
978  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
979  * iterates over the array (e.g. tcg_code_size() the same for both system/user
980  * modes.
981  */
982 #ifdef CONFIG_USER_ONLY
983 void tcg_register_thread(void)
984 {
985     tcg_ctx = &tcg_init_ctx;
986 }
987 #else
988 void tcg_register_thread(void)
989 {
990     TCGContext *s = g_malloc(sizeof(*s));
991     unsigned int i, n;
992 
993     *s = tcg_init_ctx;
994 
995     /* Relink mem_base.  */
996     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
997         if (tcg_init_ctx.temps[i].mem_base) {
998             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
999             tcg_debug_assert(b >= 0 && b < n);
1000             s->temps[i].mem_base = &s->temps[b];
1001         }
1002     }
1003 
1004     /* Claim an entry in tcg_ctxs */
1005     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1006     g_assert(n < tcg_max_ctxs);
1007     qatomic_set(&tcg_ctxs[n], s);
1008 
1009     if (n > 0) {
1010         tcg_region_initial_alloc(s);
1011     }
1012 
1013     tcg_ctx = s;
1014 }
1015 #endif /* !CONFIG_USER_ONLY */
1016 
1017 /* pool based memory allocation */
1018 void *tcg_malloc_internal(TCGContext *s, int size)
1019 {
1020     TCGPool *p;
1021     int pool_size;
1022 
1023     if (size > TCG_POOL_CHUNK_SIZE) {
1024         /* big malloc: insert a new pool (XXX: could optimize) */
1025         p = g_malloc(sizeof(TCGPool) + size);
1026         p->size = size;
1027         p->next = s->pool_first_large;
1028         s->pool_first_large = p;
1029         return p->data;
1030     } else {
1031         p = s->pool_current;
1032         if (!p) {
1033             p = s->pool_first;
1034             if (!p)
1035                 goto new_pool;
1036         } else {
1037             if (!p->next) {
1038             new_pool:
1039                 pool_size = TCG_POOL_CHUNK_SIZE;
1040                 p = g_malloc(sizeof(TCGPool) + pool_size);
1041                 p->size = pool_size;
1042                 p->next = NULL;
1043                 if (s->pool_current) {
1044                     s->pool_current->next = p;
1045                 } else {
1046                     s->pool_first = p;
1047                 }
1048             } else {
1049                 p = p->next;
1050             }
1051         }
1052     }
1053     s->pool_current = p;
1054     s->pool_cur = p->data + size;
1055     s->pool_end = p->data + p->size;
1056     return p->data;
1057 }
1058 
1059 void tcg_pool_reset(TCGContext *s)
1060 {
1061     TCGPool *p, *t;
1062     for (p = s->pool_first_large; p; p = t) {
1063         t = p->next;
1064         g_free(p);
1065     }
1066     s->pool_first_large = NULL;
1067     s->pool_cur = s->pool_end = NULL;
1068     s->pool_current = NULL;
1069 }
1070 
1071 /*
1072  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1073  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1074  * We only use these for layout in tcg_out_ld_helper_ret and
1075  * tcg_out_st_helper_args, and share them between several of
1076  * the helpers, with the end result that it's easier to build manually.
1077  */
1078 
1079 #if TCG_TARGET_REG_BITS == 32
1080 # define dh_typecode_ttl  dh_typecode_i32
1081 #else
1082 # define dh_typecode_ttl  dh_typecode_i64
1083 #endif
1084 
1085 static TCGHelperInfo info_helper_ld32_mmu = {
1086     .flags = TCG_CALL_NO_WG,
1087     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1088               | dh_typemask(env, 1)
1089               | dh_typemask(i64, 2)  /* uint64_t addr */
1090               | dh_typemask(i32, 3)  /* unsigned oi */
1091               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1092 };
1093 
1094 static TCGHelperInfo info_helper_ld64_mmu = {
1095     .flags = TCG_CALL_NO_WG,
1096     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1097               | dh_typemask(env, 1)
1098               | dh_typemask(i64, 2)  /* uint64_t addr */
1099               | dh_typemask(i32, 3)  /* unsigned oi */
1100               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1101 };
1102 
1103 static TCGHelperInfo info_helper_ld128_mmu = {
1104     .flags = TCG_CALL_NO_WG,
1105     .typemask = dh_typemask(i128, 0) /* return Int128 */
1106               | dh_typemask(env, 1)
1107               | dh_typemask(i64, 2)  /* uint64_t addr */
1108               | dh_typemask(i32, 3)  /* unsigned oi */
1109               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1110 };
1111 
1112 static TCGHelperInfo info_helper_st32_mmu = {
1113     .flags = TCG_CALL_NO_WG,
1114     .typemask = dh_typemask(void, 0)
1115               | dh_typemask(env, 1)
1116               | dh_typemask(i64, 2)  /* uint64_t addr */
1117               | dh_typemask(i32, 3)  /* uint32_t data */
1118               | dh_typemask(i32, 4)  /* unsigned oi */
1119               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1120 };
1121 
1122 static TCGHelperInfo info_helper_st64_mmu = {
1123     .flags = TCG_CALL_NO_WG,
1124     .typemask = dh_typemask(void, 0)
1125               | dh_typemask(env, 1)
1126               | dh_typemask(i64, 2)  /* uint64_t addr */
1127               | dh_typemask(i64, 3)  /* uint64_t data */
1128               | dh_typemask(i32, 4)  /* unsigned oi */
1129               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1130 };
1131 
1132 static TCGHelperInfo info_helper_st128_mmu = {
1133     .flags = TCG_CALL_NO_WG,
1134     .typemask = dh_typemask(void, 0)
1135               | dh_typemask(env, 1)
1136               | dh_typemask(i64, 2)  /* uint64_t addr */
1137               | dh_typemask(i128, 3) /* Int128 data */
1138               | dh_typemask(i32, 4)  /* unsigned oi */
1139               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1140 };
1141 
1142 #ifdef CONFIG_TCG_INTERPRETER
1143 static ffi_type *typecode_to_ffi(int argmask)
1144 {
1145     /*
1146      * libffi does not support __int128_t, so we have forced Int128
1147      * to use the structure definition instead of the builtin type.
1148      */
1149     static ffi_type *ffi_type_i128_elements[3] = {
1150         &ffi_type_uint64,
1151         &ffi_type_uint64,
1152         NULL
1153     };
1154     static ffi_type ffi_type_i128 = {
1155         .size = 16,
1156         .alignment = __alignof__(Int128),
1157         .type = FFI_TYPE_STRUCT,
1158         .elements = ffi_type_i128_elements,
1159     };
1160 
1161     switch (argmask) {
1162     case dh_typecode_void:
1163         return &ffi_type_void;
1164     case dh_typecode_i32:
1165         return &ffi_type_uint32;
1166     case dh_typecode_s32:
1167         return &ffi_type_sint32;
1168     case dh_typecode_i64:
1169         return &ffi_type_uint64;
1170     case dh_typecode_s64:
1171         return &ffi_type_sint64;
1172     case dh_typecode_ptr:
1173         return &ffi_type_pointer;
1174     case dh_typecode_i128:
1175         return &ffi_type_i128;
1176     }
1177     g_assert_not_reached();
1178 }
1179 
1180 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1181 {
1182     unsigned typemask = info->typemask;
1183     struct {
1184         ffi_cif cif;
1185         ffi_type *args[];
1186     } *ca;
1187     ffi_status status;
1188     int nargs;
1189 
1190     /* Ignoring the return type, find the last non-zero field. */
1191     nargs = 32 - clz32(typemask >> 3);
1192     nargs = DIV_ROUND_UP(nargs, 3);
1193     assert(nargs <= MAX_CALL_IARGS);
1194 
1195     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1196     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1197     ca->cif.nargs = nargs;
1198 
1199     if (nargs != 0) {
1200         ca->cif.arg_types = ca->args;
1201         for (int j = 0; j < nargs; ++j) {
1202             int typecode = extract32(typemask, (j + 1) * 3, 3);
1203             ca->args[j] = typecode_to_ffi(typecode);
1204         }
1205     }
1206 
1207     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1208                           ca->cif.rtype, ca->cif.arg_types);
1209     assert(status == FFI_OK);
1210 
1211     return &ca->cif;
1212 }
1213 
1214 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1215 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1216 #else
1217 #define HELPER_INFO_INIT(I)      (&(I)->init)
1218 #define HELPER_INFO_INIT_VAL(I)  1
1219 #endif /* CONFIG_TCG_INTERPRETER */
1220 
1221 static inline bool arg_slot_reg_p(unsigned arg_slot)
1222 {
1223     /*
1224      * Split the sizeof away from the comparison to avoid Werror from
1225      * "unsigned < 0 is always false", when iarg_regs is empty.
1226      */
1227     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1228     return arg_slot < nreg;
1229 }
1230 
1231 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1232 {
1233     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1234     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1235 
1236     tcg_debug_assert(stk_slot < max);
1237     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1238 }
1239 
1240 typedef struct TCGCumulativeArgs {
1241     int arg_idx;                /* tcg_gen_callN args[] */
1242     int info_in_idx;            /* TCGHelperInfo in[] */
1243     int arg_slot;               /* regs+stack slot */
1244     int ref_slot;               /* stack slots for references */
1245 } TCGCumulativeArgs;
1246 
1247 static void layout_arg_even(TCGCumulativeArgs *cum)
1248 {
1249     cum->arg_slot += cum->arg_slot & 1;
1250 }
1251 
1252 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1253                          TCGCallArgumentKind kind)
1254 {
1255     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1256 
1257     *loc = (TCGCallArgumentLoc){
1258         .kind = kind,
1259         .arg_idx = cum->arg_idx,
1260         .arg_slot = cum->arg_slot,
1261     };
1262     cum->info_in_idx++;
1263     cum->arg_slot++;
1264 }
1265 
1266 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1267                                 TCGHelperInfo *info, int n)
1268 {
1269     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1270 
1271     for (int i = 0; i < n; ++i) {
1272         /* Layout all using the same arg_idx, adjusting the subindex. */
1273         loc[i] = (TCGCallArgumentLoc){
1274             .kind = TCG_CALL_ARG_NORMAL,
1275             .arg_idx = cum->arg_idx,
1276             .tmp_subindex = i,
1277             .arg_slot = cum->arg_slot + i,
1278         };
1279     }
1280     cum->info_in_idx += n;
1281     cum->arg_slot += n;
1282 }
1283 
1284 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1285 {
1286     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1287     int n = 128 / TCG_TARGET_REG_BITS;
1288 
1289     /* The first subindex carries the pointer. */
1290     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1291 
1292     /*
1293      * The callee is allowed to clobber memory associated with
1294      * structure pass by-reference.  Therefore we must make copies.
1295      * Allocate space from "ref_slot", which will be adjusted to
1296      * follow the parameters on the stack.
1297      */
1298     loc[0].ref_slot = cum->ref_slot;
1299 
1300     /*
1301      * Subsequent words also go into the reference slot, but
1302      * do not accumulate into the regular arguments.
1303      */
1304     for (int i = 1; i < n; ++i) {
1305         loc[i] = (TCGCallArgumentLoc){
1306             .kind = TCG_CALL_ARG_BY_REF_N,
1307             .arg_idx = cum->arg_idx,
1308             .tmp_subindex = i,
1309             .ref_slot = cum->ref_slot + i,
1310         };
1311     }
1312     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1313     cum->ref_slot += n;
1314 }
1315 
1316 static void init_call_layout(TCGHelperInfo *info)
1317 {
1318     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1319     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1320     unsigned typemask = info->typemask;
1321     unsigned typecode;
1322     TCGCumulativeArgs cum = { };
1323 
1324     /*
1325      * Parse and place any function return value.
1326      */
1327     typecode = typemask & 7;
1328     switch (typecode) {
1329     case dh_typecode_void:
1330         info->nr_out = 0;
1331         break;
1332     case dh_typecode_i32:
1333     case dh_typecode_s32:
1334     case dh_typecode_ptr:
1335         info->nr_out = 1;
1336         info->out_kind = TCG_CALL_RET_NORMAL;
1337         break;
1338     case dh_typecode_i64:
1339     case dh_typecode_s64:
1340         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1341         info->out_kind = TCG_CALL_RET_NORMAL;
1342         /* Query the last register now to trigger any assert early. */
1343         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1344         break;
1345     case dh_typecode_i128:
1346         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1347         info->out_kind = TCG_TARGET_CALL_RET_I128;
1348         switch (TCG_TARGET_CALL_RET_I128) {
1349         case TCG_CALL_RET_NORMAL:
1350             /* Query the last register now to trigger any assert early. */
1351             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1352             break;
1353         case TCG_CALL_RET_BY_VEC:
1354             /* Query the single register now to trigger any assert early. */
1355             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1356             break;
1357         case TCG_CALL_RET_BY_REF:
1358             /*
1359              * Allocate the first argument to the output.
1360              * We don't need to store this anywhere, just make it
1361              * unavailable for use in the input loop below.
1362              */
1363             cum.arg_slot = 1;
1364             break;
1365         default:
1366             qemu_build_not_reached();
1367         }
1368         break;
1369     default:
1370         g_assert_not_reached();
1371     }
1372 
1373     /*
1374      * Parse and place function arguments.
1375      */
1376     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1377         TCGCallArgumentKind kind;
1378         TCGType type;
1379 
1380         typecode = typemask & 7;
1381         switch (typecode) {
1382         case dh_typecode_i32:
1383         case dh_typecode_s32:
1384             type = TCG_TYPE_I32;
1385             break;
1386         case dh_typecode_i64:
1387         case dh_typecode_s64:
1388             type = TCG_TYPE_I64;
1389             break;
1390         case dh_typecode_ptr:
1391             type = TCG_TYPE_PTR;
1392             break;
1393         case dh_typecode_i128:
1394             type = TCG_TYPE_I128;
1395             break;
1396         default:
1397             g_assert_not_reached();
1398         }
1399 
1400         switch (type) {
1401         case TCG_TYPE_I32:
1402             switch (TCG_TARGET_CALL_ARG_I32) {
1403             case TCG_CALL_ARG_EVEN:
1404                 layout_arg_even(&cum);
1405                 /* fall through */
1406             case TCG_CALL_ARG_NORMAL:
1407                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1408                 break;
1409             case TCG_CALL_ARG_EXTEND:
1410                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1411                 layout_arg_1(&cum, info, kind);
1412                 break;
1413             default:
1414                 qemu_build_not_reached();
1415             }
1416             break;
1417 
1418         case TCG_TYPE_I64:
1419             switch (TCG_TARGET_CALL_ARG_I64) {
1420             case TCG_CALL_ARG_EVEN:
1421                 layout_arg_even(&cum);
1422                 /* fall through */
1423             case TCG_CALL_ARG_NORMAL:
1424                 if (TCG_TARGET_REG_BITS == 32) {
1425                     layout_arg_normal_n(&cum, info, 2);
1426                 } else {
1427                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1428                 }
1429                 break;
1430             default:
1431                 qemu_build_not_reached();
1432             }
1433             break;
1434 
1435         case TCG_TYPE_I128:
1436             switch (TCG_TARGET_CALL_ARG_I128) {
1437             case TCG_CALL_ARG_EVEN:
1438                 layout_arg_even(&cum);
1439                 /* fall through */
1440             case TCG_CALL_ARG_NORMAL:
1441                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1442                 break;
1443             case TCG_CALL_ARG_BY_REF:
1444                 layout_arg_by_ref(&cum, info);
1445                 break;
1446             default:
1447                 qemu_build_not_reached();
1448             }
1449             break;
1450 
1451         default:
1452             g_assert_not_reached();
1453         }
1454     }
1455     info->nr_in = cum.info_in_idx;
1456 
1457     /* Validate that we didn't overrun the input array. */
1458     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1459     /* Validate the backend has enough argument space. */
1460     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1461 
1462     /*
1463      * Relocate the "ref_slot" area to the end of the parameters.
1464      * Minimizing this stack offset helps code size for x86,
1465      * which has a signed 8-bit offset encoding.
1466      */
1467     if (cum.ref_slot != 0) {
1468         int ref_base = 0;
1469 
1470         if (cum.arg_slot > max_reg_slots) {
1471             int align = __alignof(Int128) / sizeof(tcg_target_long);
1472 
1473             ref_base = cum.arg_slot - max_reg_slots;
1474             if (align > 1) {
1475                 ref_base = ROUND_UP(ref_base, align);
1476             }
1477         }
1478         assert(ref_base + cum.ref_slot <= max_stk_slots);
1479         ref_base += max_reg_slots;
1480 
1481         if (ref_base != 0) {
1482             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1483                 TCGCallArgumentLoc *loc = &info->in[i];
1484                 switch (loc->kind) {
1485                 case TCG_CALL_ARG_BY_REF:
1486                 case TCG_CALL_ARG_BY_REF_N:
1487                     loc->ref_slot += ref_base;
1488                     break;
1489                 default:
1490                     break;
1491                 }
1492             }
1493         }
1494     }
1495 }
1496 
1497 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1498 static void process_constraint_sets(void);
1499 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1500                                             TCGReg reg, const char *name);
1501 
1502 static void tcg_context_init(unsigned max_cpus)
1503 {
1504     TCGContext *s = &tcg_init_ctx;
1505     int n, i;
1506     TCGTemp *ts;
1507 
1508     memset(s, 0, sizeof(*s));
1509     s->nb_globals = 0;
1510 
1511     init_call_layout(&info_helper_ld32_mmu);
1512     init_call_layout(&info_helper_ld64_mmu);
1513     init_call_layout(&info_helper_ld128_mmu);
1514     init_call_layout(&info_helper_st32_mmu);
1515     init_call_layout(&info_helper_st64_mmu);
1516     init_call_layout(&info_helper_st128_mmu);
1517 
1518     tcg_target_init(s);
1519     process_constraint_sets();
1520 
1521     /* Reverse the order of the saved registers, assuming they're all at
1522        the start of tcg_target_reg_alloc_order.  */
1523     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1524         int r = tcg_target_reg_alloc_order[n];
1525         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1526             break;
1527         }
1528     }
1529     for (i = 0; i < n; ++i) {
1530         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1531     }
1532     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1533         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1534     }
1535 
1536     tcg_ctx = s;
1537     /*
1538      * In user-mode we simply share the init context among threads, since we
1539      * use a single region. See the documentation tcg_region_init() for the
1540      * reasoning behind this.
1541      * In system-mode we will have at most max_cpus TCG threads.
1542      */
1543 #ifdef CONFIG_USER_ONLY
1544     tcg_ctxs = &tcg_ctx;
1545     tcg_cur_ctxs = 1;
1546     tcg_max_ctxs = 1;
1547 #else
1548     tcg_max_ctxs = max_cpus;
1549     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1550 #endif
1551 
1552     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1553     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1554     tcg_env = temp_tcgv_ptr(ts);
1555 }
1556 
1557 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1558 {
1559     tcg_context_init(max_cpus);
1560     tcg_region_init(tb_size, splitwx, max_cpus);
1561 }
1562 
1563 /*
1564  * Allocate TBs right before their corresponding translated code, making
1565  * sure that TBs and code are on different cache lines.
1566  */
1567 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1568 {
1569     uintptr_t align = qemu_icache_linesize;
1570     TranslationBlock *tb;
1571     void *next;
1572 
1573  retry:
1574     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1575     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1576 
1577     if (unlikely(next > s->code_gen_highwater)) {
1578         if (tcg_region_alloc(s)) {
1579             return NULL;
1580         }
1581         goto retry;
1582     }
1583     qatomic_set(&s->code_gen_ptr, next);
1584     return tb;
1585 }
1586 
1587 void tcg_prologue_init(void)
1588 {
1589     TCGContext *s = tcg_ctx;
1590     size_t prologue_size;
1591 
1592     s->code_ptr = s->code_gen_ptr;
1593     s->code_buf = s->code_gen_ptr;
1594     s->data_gen_ptr = NULL;
1595 
1596 #ifndef CONFIG_TCG_INTERPRETER
1597     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1598 #endif
1599 
1600     s->pool_labels = NULL;
1601 
1602     qemu_thread_jit_write();
1603     /* Generate the prologue.  */
1604     tcg_target_qemu_prologue(s);
1605 
1606     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1607     {
1608         int result = tcg_out_pool_finalize(s);
1609         tcg_debug_assert(result == 0);
1610     }
1611 
1612     prologue_size = tcg_current_code_size(s);
1613     perf_report_prologue(s->code_gen_ptr, prologue_size);
1614 
1615 #ifndef CONFIG_TCG_INTERPRETER
1616     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1617                         (uintptr_t)s->code_buf, prologue_size);
1618 #endif
1619 
1620     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1621         FILE *logfile = qemu_log_trylock();
1622         if (logfile) {
1623             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1624             if (s->data_gen_ptr) {
1625                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1626                 size_t data_size = prologue_size - code_size;
1627                 size_t i;
1628 
1629                 disas(logfile, s->code_gen_ptr, code_size);
1630 
1631                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1632                     if (sizeof(tcg_target_ulong) == 8) {
1633                         fprintf(logfile,
1634                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1635                                 (uintptr_t)s->data_gen_ptr + i,
1636                                 *(uint64_t *)(s->data_gen_ptr + i));
1637                     } else {
1638                         fprintf(logfile,
1639                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1640                                 (uintptr_t)s->data_gen_ptr + i,
1641                                 *(uint32_t *)(s->data_gen_ptr + i));
1642                     }
1643                 }
1644             } else {
1645                 disas(logfile, s->code_gen_ptr, prologue_size);
1646             }
1647             fprintf(logfile, "\n");
1648             qemu_log_unlock(logfile);
1649         }
1650     }
1651 
1652 #ifndef CONFIG_TCG_INTERPRETER
1653     /*
1654      * Assert that goto_ptr is implemented completely, setting an epilogue.
1655      * For tci, we use NULL as the signal to return from the interpreter,
1656      * so skip this check.
1657      */
1658     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1659 #endif
1660 
1661     tcg_region_prologue_set(s);
1662 }
1663 
1664 void tcg_func_start(TCGContext *s)
1665 {
1666     tcg_pool_reset(s);
1667     s->nb_temps = s->nb_globals;
1668 
1669     /* No temps have been previously allocated for size or locality.  */
1670     tcg_temp_ebb_reset_freed(s);
1671 
1672     /* No constant temps have been previously allocated. */
1673     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1674         if (s->const_table[i]) {
1675             g_hash_table_remove_all(s->const_table[i]);
1676         }
1677     }
1678 
1679     s->nb_ops = 0;
1680     s->nb_labels = 0;
1681     s->current_frame_offset = s->frame_start;
1682 
1683 #ifdef CONFIG_DEBUG_TCG
1684     s->goto_tb_issue_mask = 0;
1685 #endif
1686 
1687     QTAILQ_INIT(&s->ops);
1688     QTAILQ_INIT(&s->free_ops);
1689     s->emit_before_op = NULL;
1690     QSIMPLEQ_INIT(&s->labels);
1691 
1692     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1693     tcg_debug_assert(s->insn_start_words > 0);
1694 }
1695 
1696 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1697 {
1698     int n = s->nb_temps++;
1699 
1700     if (n >= TCG_MAX_TEMPS) {
1701         tcg_raise_tb_overflow(s);
1702     }
1703     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1704 }
1705 
1706 static TCGTemp *tcg_global_alloc(TCGContext *s)
1707 {
1708     TCGTemp *ts;
1709 
1710     tcg_debug_assert(s->nb_globals == s->nb_temps);
1711     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1712     s->nb_globals++;
1713     ts = tcg_temp_alloc(s);
1714     ts->kind = TEMP_GLOBAL;
1715 
1716     return ts;
1717 }
1718 
1719 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1720                                             TCGReg reg, const char *name)
1721 {
1722     TCGTemp *ts;
1723 
1724     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1725 
1726     ts = tcg_global_alloc(s);
1727     ts->base_type = type;
1728     ts->type = type;
1729     ts->kind = TEMP_FIXED;
1730     ts->reg = reg;
1731     ts->name = name;
1732     tcg_regset_set_reg(s->reserved_regs, reg);
1733 
1734     return ts;
1735 }
1736 
1737 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1738 {
1739     s->frame_start = start;
1740     s->frame_end = start + size;
1741     s->frame_temp
1742         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1743 }
1744 
1745 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1746                                             const char *name, TCGType type)
1747 {
1748     TCGContext *s = tcg_ctx;
1749     TCGTemp *base_ts = tcgv_ptr_temp(base);
1750     TCGTemp *ts = tcg_global_alloc(s);
1751     int indirect_reg = 0;
1752 
1753     switch (base_ts->kind) {
1754     case TEMP_FIXED:
1755         break;
1756     case TEMP_GLOBAL:
1757         /* We do not support double-indirect registers.  */
1758         tcg_debug_assert(!base_ts->indirect_reg);
1759         base_ts->indirect_base = 1;
1760         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1761                             ? 2 : 1);
1762         indirect_reg = 1;
1763         break;
1764     default:
1765         g_assert_not_reached();
1766     }
1767 
1768     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1769         TCGTemp *ts2 = tcg_global_alloc(s);
1770         char buf[64];
1771 
1772         ts->base_type = TCG_TYPE_I64;
1773         ts->type = TCG_TYPE_I32;
1774         ts->indirect_reg = indirect_reg;
1775         ts->mem_allocated = 1;
1776         ts->mem_base = base_ts;
1777         ts->mem_offset = offset;
1778         pstrcpy(buf, sizeof(buf), name);
1779         pstrcat(buf, sizeof(buf), "_0");
1780         ts->name = strdup(buf);
1781 
1782         tcg_debug_assert(ts2 == ts + 1);
1783         ts2->base_type = TCG_TYPE_I64;
1784         ts2->type = TCG_TYPE_I32;
1785         ts2->indirect_reg = indirect_reg;
1786         ts2->mem_allocated = 1;
1787         ts2->mem_base = base_ts;
1788         ts2->mem_offset = offset + 4;
1789         ts2->temp_subindex = 1;
1790         pstrcpy(buf, sizeof(buf), name);
1791         pstrcat(buf, sizeof(buf), "_1");
1792         ts2->name = strdup(buf);
1793     } else {
1794         ts->base_type = type;
1795         ts->type = type;
1796         ts->indirect_reg = indirect_reg;
1797         ts->mem_allocated = 1;
1798         ts->mem_base = base_ts;
1799         ts->mem_offset = offset;
1800         ts->name = name;
1801     }
1802     return ts;
1803 }
1804 
1805 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1806 {
1807     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1808     return temp_tcgv_i32(ts);
1809 }
1810 
1811 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1812 {
1813     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1814     return temp_tcgv_i64(ts);
1815 }
1816 
1817 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1818 {
1819     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1820     return temp_tcgv_ptr(ts);
1821 }
1822 
1823 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1824 {
1825     TCGContext *s = tcg_ctx;
1826     TCGTemp *ts;
1827     int n;
1828 
1829     if (kind == TEMP_EBB) {
1830         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1831 
1832         if (idx < TCG_MAX_TEMPS) {
1833             /* There is already an available temp with the right type.  */
1834             clear_bit(idx, s->free_temps[type].l);
1835 
1836             ts = &s->temps[idx];
1837             ts->temp_allocated = 1;
1838             tcg_debug_assert(ts->base_type == type);
1839             tcg_debug_assert(ts->kind == kind);
1840             return ts;
1841         }
1842     } else {
1843         tcg_debug_assert(kind == TEMP_TB);
1844     }
1845 
1846     switch (type) {
1847     case TCG_TYPE_I32:
1848     case TCG_TYPE_V64:
1849     case TCG_TYPE_V128:
1850     case TCG_TYPE_V256:
1851         n = 1;
1852         break;
1853     case TCG_TYPE_I64:
1854         n = 64 / TCG_TARGET_REG_BITS;
1855         break;
1856     case TCG_TYPE_I128:
1857         n = 128 / TCG_TARGET_REG_BITS;
1858         break;
1859     default:
1860         g_assert_not_reached();
1861     }
1862 
1863     ts = tcg_temp_alloc(s);
1864     ts->base_type = type;
1865     ts->temp_allocated = 1;
1866     ts->kind = kind;
1867 
1868     if (n == 1) {
1869         ts->type = type;
1870     } else {
1871         ts->type = TCG_TYPE_REG;
1872 
1873         for (int i = 1; i < n; ++i) {
1874             TCGTemp *ts2 = tcg_temp_alloc(s);
1875 
1876             tcg_debug_assert(ts2 == ts + i);
1877             ts2->base_type = type;
1878             ts2->type = TCG_TYPE_REG;
1879             ts2->temp_allocated = 1;
1880             ts2->temp_subindex = i;
1881             ts2->kind = kind;
1882         }
1883     }
1884     return ts;
1885 }
1886 
1887 TCGv_i32 tcg_temp_new_i32(void)
1888 {
1889     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1890 }
1891 
1892 TCGv_i32 tcg_temp_ebb_new_i32(void)
1893 {
1894     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1895 }
1896 
1897 TCGv_i64 tcg_temp_new_i64(void)
1898 {
1899     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1900 }
1901 
1902 TCGv_i64 tcg_temp_ebb_new_i64(void)
1903 {
1904     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1905 }
1906 
1907 TCGv_ptr tcg_temp_new_ptr(void)
1908 {
1909     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1910 }
1911 
1912 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1913 {
1914     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1915 }
1916 
1917 TCGv_i128 tcg_temp_new_i128(void)
1918 {
1919     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1920 }
1921 
1922 TCGv_i128 tcg_temp_ebb_new_i128(void)
1923 {
1924     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1925 }
1926 
1927 TCGv_vec tcg_temp_new_vec(TCGType type)
1928 {
1929     TCGTemp *t;
1930 
1931 #ifdef CONFIG_DEBUG_TCG
1932     switch (type) {
1933     case TCG_TYPE_V64:
1934         assert(TCG_TARGET_HAS_v64);
1935         break;
1936     case TCG_TYPE_V128:
1937         assert(TCG_TARGET_HAS_v128);
1938         break;
1939     case TCG_TYPE_V256:
1940         assert(TCG_TARGET_HAS_v256);
1941         break;
1942     default:
1943         g_assert_not_reached();
1944     }
1945 #endif
1946 
1947     t = tcg_temp_new_internal(type, TEMP_EBB);
1948     return temp_tcgv_vec(t);
1949 }
1950 
1951 /* Create a new temp of the same type as an existing temp.  */
1952 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1953 {
1954     TCGTemp *t = tcgv_vec_temp(match);
1955 
1956     tcg_debug_assert(t->temp_allocated != 0);
1957 
1958     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1959     return temp_tcgv_vec(t);
1960 }
1961 
1962 void tcg_temp_free_internal(TCGTemp *ts)
1963 {
1964     TCGContext *s = tcg_ctx;
1965 
1966     switch (ts->kind) {
1967     case TEMP_CONST:
1968     case TEMP_TB:
1969         /* Silently ignore free. */
1970         break;
1971     case TEMP_EBB:
1972         tcg_debug_assert(ts->temp_allocated != 0);
1973         ts->temp_allocated = 0;
1974         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1975         break;
1976     default:
1977         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1978         g_assert_not_reached();
1979     }
1980 }
1981 
1982 void tcg_temp_free_i32(TCGv_i32 arg)
1983 {
1984     tcg_temp_free_internal(tcgv_i32_temp(arg));
1985 }
1986 
1987 void tcg_temp_free_i64(TCGv_i64 arg)
1988 {
1989     tcg_temp_free_internal(tcgv_i64_temp(arg));
1990 }
1991 
1992 void tcg_temp_free_i128(TCGv_i128 arg)
1993 {
1994     tcg_temp_free_internal(tcgv_i128_temp(arg));
1995 }
1996 
1997 void tcg_temp_free_ptr(TCGv_ptr arg)
1998 {
1999     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2000 }
2001 
2002 void tcg_temp_free_vec(TCGv_vec arg)
2003 {
2004     tcg_temp_free_internal(tcgv_vec_temp(arg));
2005 }
2006 
2007 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2008 {
2009     TCGContext *s = tcg_ctx;
2010     GHashTable *h = s->const_table[type];
2011     TCGTemp *ts;
2012 
2013     if (h == NULL) {
2014         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2015         s->const_table[type] = h;
2016     }
2017 
2018     ts = g_hash_table_lookup(h, &val);
2019     if (ts == NULL) {
2020         int64_t *val_ptr;
2021 
2022         ts = tcg_temp_alloc(s);
2023 
2024         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2025             TCGTemp *ts2 = tcg_temp_alloc(s);
2026 
2027             tcg_debug_assert(ts2 == ts + 1);
2028 
2029             ts->base_type = TCG_TYPE_I64;
2030             ts->type = TCG_TYPE_I32;
2031             ts->kind = TEMP_CONST;
2032             ts->temp_allocated = 1;
2033 
2034             ts2->base_type = TCG_TYPE_I64;
2035             ts2->type = TCG_TYPE_I32;
2036             ts2->kind = TEMP_CONST;
2037             ts2->temp_allocated = 1;
2038             ts2->temp_subindex = 1;
2039 
2040             /*
2041              * Retain the full value of the 64-bit constant in the low
2042              * part, so that the hash table works.  Actual uses will
2043              * truncate the value to the low part.
2044              */
2045             ts[HOST_BIG_ENDIAN].val = val;
2046             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2047             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2048         } else {
2049             ts->base_type = type;
2050             ts->type = type;
2051             ts->kind = TEMP_CONST;
2052             ts->temp_allocated = 1;
2053             ts->val = val;
2054             val_ptr = &ts->val;
2055         }
2056         g_hash_table_insert(h, val_ptr, ts);
2057     }
2058 
2059     return ts;
2060 }
2061 
2062 TCGv_i32 tcg_constant_i32(int32_t val)
2063 {
2064     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2065 }
2066 
2067 TCGv_i64 tcg_constant_i64(int64_t val)
2068 {
2069     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2070 }
2071 
2072 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2073 {
2074     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2075 }
2076 
2077 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2078 {
2079     val = dup_const(vece, val);
2080     return temp_tcgv_vec(tcg_constant_internal(type, val));
2081 }
2082 
2083 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2084 {
2085     TCGTemp *t = tcgv_vec_temp(match);
2086 
2087     tcg_debug_assert(t->temp_allocated != 0);
2088     return tcg_constant_vec(t->base_type, vece, val);
2089 }
2090 
2091 #ifdef CONFIG_DEBUG_TCG
2092 size_t temp_idx(TCGTemp *ts)
2093 {
2094     ptrdiff_t n = ts - tcg_ctx->temps;
2095     assert(n >= 0 && n < tcg_ctx->nb_temps);
2096     return n;
2097 }
2098 
2099 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2100 {
2101     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2102 
2103     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2104     assert(o % sizeof(TCGTemp) == 0);
2105 
2106     return (void *)tcg_ctx + (uintptr_t)v;
2107 }
2108 #endif /* CONFIG_DEBUG_TCG */
2109 
2110 /*
2111  * Return true if OP may appear in the opcode stream with TYPE.
2112  * Test the runtime variable that controls each opcode.
2113  */
2114 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2115 {
2116     bool has_type;
2117 
2118     switch (type) {
2119     case TCG_TYPE_I32:
2120         has_type = true;
2121         break;
2122     case TCG_TYPE_I64:
2123         has_type = TCG_TARGET_REG_BITS == 64;
2124         break;
2125     case TCG_TYPE_V64:
2126         has_type = TCG_TARGET_HAS_v64;
2127         break;
2128     case TCG_TYPE_V128:
2129         has_type = TCG_TARGET_HAS_v128;
2130         break;
2131     case TCG_TYPE_V256:
2132         has_type = TCG_TARGET_HAS_v256;
2133         break;
2134     default:
2135         has_type = false;
2136         break;
2137     }
2138 
2139     switch (op) {
2140     case INDEX_op_discard:
2141     case INDEX_op_set_label:
2142     case INDEX_op_call:
2143     case INDEX_op_br:
2144     case INDEX_op_mb:
2145     case INDEX_op_insn_start:
2146     case INDEX_op_exit_tb:
2147     case INDEX_op_goto_tb:
2148     case INDEX_op_goto_ptr:
2149     case INDEX_op_qemu_ld_i32:
2150     case INDEX_op_qemu_st_i32:
2151     case INDEX_op_qemu_ld_i64:
2152     case INDEX_op_qemu_st_i64:
2153         return true;
2154 
2155     case INDEX_op_qemu_st8_i32:
2156         return TCG_TARGET_HAS_qemu_st8_i32;
2157 
2158     case INDEX_op_qemu_ld_i128:
2159     case INDEX_op_qemu_st_i128:
2160         return TCG_TARGET_HAS_qemu_ldst_i128;
2161 
2162     case INDEX_op_mov_i32:
2163     case INDEX_op_setcond_i32:
2164     case INDEX_op_brcond_i32:
2165     case INDEX_op_movcond_i32:
2166     case INDEX_op_ld8u_i32:
2167     case INDEX_op_ld8s_i32:
2168     case INDEX_op_ld16u_i32:
2169     case INDEX_op_ld16s_i32:
2170     case INDEX_op_ld_i32:
2171     case INDEX_op_st8_i32:
2172     case INDEX_op_st16_i32:
2173     case INDEX_op_st_i32:
2174     case INDEX_op_add_i32:
2175     case INDEX_op_sub_i32:
2176     case INDEX_op_neg_i32:
2177     case INDEX_op_mul_i32:
2178     case INDEX_op_and_i32:
2179     case INDEX_op_or_i32:
2180     case INDEX_op_xor_i32:
2181     case INDEX_op_shl_i32:
2182     case INDEX_op_shr_i32:
2183     case INDEX_op_sar_i32:
2184     case INDEX_op_extract_i32:
2185     case INDEX_op_sextract_i32:
2186     case INDEX_op_deposit_i32:
2187         return true;
2188 
2189     case INDEX_op_negsetcond_i32:
2190         return TCG_TARGET_HAS_negsetcond_i32;
2191     case INDEX_op_div_i32:
2192     case INDEX_op_divu_i32:
2193         return TCG_TARGET_HAS_div_i32;
2194     case INDEX_op_rem_i32:
2195     case INDEX_op_remu_i32:
2196         return TCG_TARGET_HAS_rem_i32;
2197     case INDEX_op_div2_i32:
2198     case INDEX_op_divu2_i32:
2199         return TCG_TARGET_HAS_div2_i32;
2200     case INDEX_op_rotl_i32:
2201     case INDEX_op_rotr_i32:
2202         return TCG_TARGET_HAS_rot_i32;
2203     case INDEX_op_extract2_i32:
2204         return TCG_TARGET_HAS_extract2_i32;
2205     case INDEX_op_add2_i32:
2206         return TCG_TARGET_HAS_add2_i32;
2207     case INDEX_op_sub2_i32:
2208         return TCG_TARGET_HAS_sub2_i32;
2209     case INDEX_op_mulu2_i32:
2210         return TCG_TARGET_HAS_mulu2_i32;
2211     case INDEX_op_muls2_i32:
2212         return TCG_TARGET_HAS_muls2_i32;
2213     case INDEX_op_muluh_i32:
2214         return TCG_TARGET_HAS_muluh_i32;
2215     case INDEX_op_mulsh_i32:
2216         return TCG_TARGET_HAS_mulsh_i32;
2217     case INDEX_op_ext8s_i32:
2218         return TCG_TARGET_HAS_ext8s_i32;
2219     case INDEX_op_ext16s_i32:
2220         return TCG_TARGET_HAS_ext16s_i32;
2221     case INDEX_op_ext8u_i32:
2222         return TCG_TARGET_HAS_ext8u_i32;
2223     case INDEX_op_ext16u_i32:
2224         return TCG_TARGET_HAS_ext16u_i32;
2225     case INDEX_op_bswap16_i32:
2226         return TCG_TARGET_HAS_bswap16_i32;
2227     case INDEX_op_bswap32_i32:
2228         return TCG_TARGET_HAS_bswap32_i32;
2229     case INDEX_op_not_i32:
2230         return TCG_TARGET_HAS_not_i32;
2231     case INDEX_op_andc_i32:
2232         return TCG_TARGET_HAS_andc_i32;
2233     case INDEX_op_orc_i32:
2234         return TCG_TARGET_HAS_orc_i32;
2235     case INDEX_op_eqv_i32:
2236         return TCG_TARGET_HAS_eqv_i32;
2237     case INDEX_op_nand_i32:
2238         return TCG_TARGET_HAS_nand_i32;
2239     case INDEX_op_nor_i32:
2240         return TCG_TARGET_HAS_nor_i32;
2241     case INDEX_op_clz_i32:
2242         return TCG_TARGET_HAS_clz_i32;
2243     case INDEX_op_ctz_i32:
2244         return TCG_TARGET_HAS_ctz_i32;
2245     case INDEX_op_ctpop_i32:
2246         return TCG_TARGET_HAS_ctpop_i32;
2247 
2248     case INDEX_op_brcond2_i32:
2249     case INDEX_op_setcond2_i32:
2250         return TCG_TARGET_REG_BITS == 32;
2251 
2252     case INDEX_op_mov_i64:
2253     case INDEX_op_setcond_i64:
2254     case INDEX_op_brcond_i64:
2255     case INDEX_op_movcond_i64:
2256     case INDEX_op_ld8u_i64:
2257     case INDEX_op_ld8s_i64:
2258     case INDEX_op_ld16u_i64:
2259     case INDEX_op_ld16s_i64:
2260     case INDEX_op_ld32u_i64:
2261     case INDEX_op_ld32s_i64:
2262     case INDEX_op_ld_i64:
2263     case INDEX_op_st8_i64:
2264     case INDEX_op_st16_i64:
2265     case INDEX_op_st32_i64:
2266     case INDEX_op_st_i64:
2267     case INDEX_op_add_i64:
2268     case INDEX_op_sub_i64:
2269     case INDEX_op_neg_i64:
2270     case INDEX_op_mul_i64:
2271     case INDEX_op_and_i64:
2272     case INDEX_op_or_i64:
2273     case INDEX_op_xor_i64:
2274     case INDEX_op_shl_i64:
2275     case INDEX_op_shr_i64:
2276     case INDEX_op_sar_i64:
2277     case INDEX_op_ext_i32_i64:
2278     case INDEX_op_extu_i32_i64:
2279     case INDEX_op_extract_i64:
2280     case INDEX_op_sextract_i64:
2281     case INDEX_op_deposit_i64:
2282         return TCG_TARGET_REG_BITS == 64;
2283 
2284     case INDEX_op_negsetcond_i64:
2285         return TCG_TARGET_HAS_negsetcond_i64;
2286     case INDEX_op_div_i64:
2287     case INDEX_op_divu_i64:
2288         return TCG_TARGET_HAS_div_i64;
2289     case INDEX_op_rem_i64:
2290     case INDEX_op_remu_i64:
2291         return TCG_TARGET_HAS_rem_i64;
2292     case INDEX_op_div2_i64:
2293     case INDEX_op_divu2_i64:
2294         return TCG_TARGET_HAS_div2_i64;
2295     case INDEX_op_rotl_i64:
2296     case INDEX_op_rotr_i64:
2297         return TCG_TARGET_HAS_rot_i64;
2298     case INDEX_op_extract2_i64:
2299         return TCG_TARGET_HAS_extract2_i64;
2300     case INDEX_op_extrl_i64_i32:
2301     case INDEX_op_extrh_i64_i32:
2302         return TCG_TARGET_HAS_extr_i64_i32;
2303     case INDEX_op_ext8s_i64:
2304         return TCG_TARGET_HAS_ext8s_i64;
2305     case INDEX_op_ext16s_i64:
2306         return TCG_TARGET_HAS_ext16s_i64;
2307     case INDEX_op_ext32s_i64:
2308         return TCG_TARGET_HAS_ext32s_i64;
2309     case INDEX_op_ext8u_i64:
2310         return TCG_TARGET_HAS_ext8u_i64;
2311     case INDEX_op_ext16u_i64:
2312         return TCG_TARGET_HAS_ext16u_i64;
2313     case INDEX_op_ext32u_i64:
2314         return TCG_TARGET_HAS_ext32u_i64;
2315     case INDEX_op_bswap16_i64:
2316         return TCG_TARGET_HAS_bswap16_i64;
2317     case INDEX_op_bswap32_i64:
2318         return TCG_TARGET_HAS_bswap32_i64;
2319     case INDEX_op_bswap64_i64:
2320         return TCG_TARGET_HAS_bswap64_i64;
2321     case INDEX_op_not_i64:
2322         return TCG_TARGET_HAS_not_i64;
2323     case INDEX_op_andc_i64:
2324         return TCG_TARGET_HAS_andc_i64;
2325     case INDEX_op_orc_i64:
2326         return TCG_TARGET_HAS_orc_i64;
2327     case INDEX_op_eqv_i64:
2328         return TCG_TARGET_HAS_eqv_i64;
2329     case INDEX_op_nand_i64:
2330         return TCG_TARGET_HAS_nand_i64;
2331     case INDEX_op_nor_i64:
2332         return TCG_TARGET_HAS_nor_i64;
2333     case INDEX_op_clz_i64:
2334         return TCG_TARGET_HAS_clz_i64;
2335     case INDEX_op_ctz_i64:
2336         return TCG_TARGET_HAS_ctz_i64;
2337     case INDEX_op_ctpop_i64:
2338         return TCG_TARGET_HAS_ctpop_i64;
2339     case INDEX_op_add2_i64:
2340         return TCG_TARGET_HAS_add2_i64;
2341     case INDEX_op_sub2_i64:
2342         return TCG_TARGET_HAS_sub2_i64;
2343     case INDEX_op_mulu2_i64:
2344         return TCG_TARGET_HAS_mulu2_i64;
2345     case INDEX_op_muls2_i64:
2346         return TCG_TARGET_HAS_muls2_i64;
2347     case INDEX_op_muluh_i64:
2348         return TCG_TARGET_HAS_muluh_i64;
2349     case INDEX_op_mulsh_i64:
2350         return TCG_TARGET_HAS_mulsh_i64;
2351 
2352     case INDEX_op_mov_vec:
2353     case INDEX_op_dup_vec:
2354     case INDEX_op_dupm_vec:
2355     case INDEX_op_ld_vec:
2356     case INDEX_op_st_vec:
2357     case INDEX_op_add_vec:
2358     case INDEX_op_sub_vec:
2359     case INDEX_op_and_vec:
2360     case INDEX_op_or_vec:
2361     case INDEX_op_xor_vec:
2362     case INDEX_op_cmp_vec:
2363         return has_type;
2364     case INDEX_op_dup2_vec:
2365         return has_type && TCG_TARGET_REG_BITS == 32;
2366     case INDEX_op_not_vec:
2367         return has_type && TCG_TARGET_HAS_not_vec;
2368     case INDEX_op_neg_vec:
2369         return has_type && TCG_TARGET_HAS_neg_vec;
2370     case INDEX_op_abs_vec:
2371         return has_type && TCG_TARGET_HAS_abs_vec;
2372     case INDEX_op_andc_vec:
2373         return has_type && TCG_TARGET_HAS_andc_vec;
2374     case INDEX_op_orc_vec:
2375         return has_type && TCG_TARGET_HAS_orc_vec;
2376     case INDEX_op_nand_vec:
2377         return has_type && TCG_TARGET_HAS_nand_vec;
2378     case INDEX_op_nor_vec:
2379         return has_type && TCG_TARGET_HAS_nor_vec;
2380     case INDEX_op_eqv_vec:
2381         return has_type && TCG_TARGET_HAS_eqv_vec;
2382     case INDEX_op_mul_vec:
2383         return has_type && TCG_TARGET_HAS_mul_vec;
2384     case INDEX_op_shli_vec:
2385     case INDEX_op_shri_vec:
2386     case INDEX_op_sari_vec:
2387         return has_type && TCG_TARGET_HAS_shi_vec;
2388     case INDEX_op_shls_vec:
2389     case INDEX_op_shrs_vec:
2390     case INDEX_op_sars_vec:
2391         return has_type && TCG_TARGET_HAS_shs_vec;
2392     case INDEX_op_shlv_vec:
2393     case INDEX_op_shrv_vec:
2394     case INDEX_op_sarv_vec:
2395         return has_type && TCG_TARGET_HAS_shv_vec;
2396     case INDEX_op_rotli_vec:
2397         return has_type && TCG_TARGET_HAS_roti_vec;
2398     case INDEX_op_rotls_vec:
2399         return has_type && TCG_TARGET_HAS_rots_vec;
2400     case INDEX_op_rotlv_vec:
2401     case INDEX_op_rotrv_vec:
2402         return has_type && TCG_TARGET_HAS_rotv_vec;
2403     case INDEX_op_ssadd_vec:
2404     case INDEX_op_usadd_vec:
2405     case INDEX_op_sssub_vec:
2406     case INDEX_op_ussub_vec:
2407         return has_type && TCG_TARGET_HAS_sat_vec;
2408     case INDEX_op_smin_vec:
2409     case INDEX_op_umin_vec:
2410     case INDEX_op_smax_vec:
2411     case INDEX_op_umax_vec:
2412         return has_type && TCG_TARGET_HAS_minmax_vec;
2413     case INDEX_op_bitsel_vec:
2414         return has_type && TCG_TARGET_HAS_bitsel_vec;
2415     case INDEX_op_cmpsel_vec:
2416         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2417 
2418     default:
2419         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2420         return true;
2421     }
2422 }
2423 
2424 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2425 {
2426     unsigned width;
2427 
2428     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2429     width = (type == TCG_TYPE_I32 ? 32 : 64);
2430 
2431     tcg_debug_assert(ofs < width);
2432     tcg_debug_assert(len > 0);
2433     tcg_debug_assert(len <= width - ofs);
2434 
2435     return TCG_TARGET_deposit_valid(type, ofs, len);
2436 }
2437 
2438 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2439 
2440 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2441                           TCGTemp *ret, TCGTemp **args)
2442 {
2443     TCGv_i64 extend_free[MAX_CALL_IARGS];
2444     int n_extend = 0;
2445     TCGOp *op;
2446     int i, n, pi = 0, total_args;
2447 
2448     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2449         init_call_layout(info);
2450         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2451     }
2452 
2453     total_args = info->nr_out + info->nr_in + 2;
2454     op = tcg_op_alloc(INDEX_op_call, total_args);
2455 
2456 #ifdef CONFIG_PLUGIN
2457     /* Flag helpers that may affect guest state */
2458     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2459         tcg_ctx->plugin_insn->calls_helpers = true;
2460     }
2461 #endif
2462 
2463     TCGOP_CALLO(op) = n = info->nr_out;
2464     switch (n) {
2465     case 0:
2466         tcg_debug_assert(ret == NULL);
2467         break;
2468     case 1:
2469         tcg_debug_assert(ret != NULL);
2470         op->args[pi++] = temp_arg(ret);
2471         break;
2472     case 2:
2473     case 4:
2474         tcg_debug_assert(ret != NULL);
2475         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2476         tcg_debug_assert(ret->temp_subindex == 0);
2477         for (i = 0; i < n; ++i) {
2478             op->args[pi++] = temp_arg(ret + i);
2479         }
2480         break;
2481     default:
2482         g_assert_not_reached();
2483     }
2484 
2485     TCGOP_CALLI(op) = n = info->nr_in;
2486     for (i = 0; i < n; i++) {
2487         const TCGCallArgumentLoc *loc = &info->in[i];
2488         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2489 
2490         switch (loc->kind) {
2491         case TCG_CALL_ARG_NORMAL:
2492         case TCG_CALL_ARG_BY_REF:
2493         case TCG_CALL_ARG_BY_REF_N:
2494             op->args[pi++] = temp_arg(ts);
2495             break;
2496 
2497         case TCG_CALL_ARG_EXTEND_U:
2498         case TCG_CALL_ARG_EXTEND_S:
2499             {
2500                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2501                 TCGv_i32 orig = temp_tcgv_i32(ts);
2502 
2503                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2504                     tcg_gen_ext_i32_i64(temp, orig);
2505                 } else {
2506                     tcg_gen_extu_i32_i64(temp, orig);
2507                 }
2508                 op->args[pi++] = tcgv_i64_arg(temp);
2509                 extend_free[n_extend++] = temp;
2510             }
2511             break;
2512 
2513         default:
2514             g_assert_not_reached();
2515         }
2516     }
2517     op->args[pi++] = (uintptr_t)func;
2518     op->args[pi++] = (uintptr_t)info;
2519     tcg_debug_assert(pi == total_args);
2520 
2521     if (tcg_ctx->emit_before_op) {
2522         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2523     } else {
2524         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2525     }
2526 
2527     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2528     for (i = 0; i < n_extend; ++i) {
2529         tcg_temp_free_i64(extend_free[i]);
2530     }
2531 }
2532 
2533 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2534 {
2535     tcg_gen_callN(func, info, ret, NULL);
2536 }
2537 
2538 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2539 {
2540     tcg_gen_callN(func, info, ret, &t1);
2541 }
2542 
2543 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2544                    TCGTemp *t1, TCGTemp *t2)
2545 {
2546     TCGTemp *args[2] = { t1, t2 };
2547     tcg_gen_callN(func, info, ret, args);
2548 }
2549 
2550 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2551                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2552 {
2553     TCGTemp *args[3] = { t1, t2, t3 };
2554     tcg_gen_callN(func, info, ret, args);
2555 }
2556 
2557 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2558                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2559 {
2560     TCGTemp *args[4] = { t1, t2, t3, t4 };
2561     tcg_gen_callN(func, info, ret, args);
2562 }
2563 
2564 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2565                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2566 {
2567     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2568     tcg_gen_callN(func, info, ret, args);
2569 }
2570 
2571 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2572                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2573                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2574 {
2575     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2576     tcg_gen_callN(func, info, ret, args);
2577 }
2578 
2579 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2580                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2581                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2582 {
2583     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2584     tcg_gen_callN(func, info, ret, args);
2585 }
2586 
2587 static void tcg_reg_alloc_start(TCGContext *s)
2588 {
2589     int i, n;
2590 
2591     for (i = 0, n = s->nb_temps; i < n; i++) {
2592         TCGTemp *ts = &s->temps[i];
2593         TCGTempVal val = TEMP_VAL_MEM;
2594 
2595         switch (ts->kind) {
2596         case TEMP_CONST:
2597             val = TEMP_VAL_CONST;
2598             break;
2599         case TEMP_FIXED:
2600             val = TEMP_VAL_REG;
2601             break;
2602         case TEMP_GLOBAL:
2603             break;
2604         case TEMP_EBB:
2605             val = TEMP_VAL_DEAD;
2606             /* fall through */
2607         case TEMP_TB:
2608             ts->mem_allocated = 0;
2609             break;
2610         default:
2611             g_assert_not_reached();
2612         }
2613         ts->val_type = val;
2614     }
2615 
2616     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2617 }
2618 
2619 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2620                                  TCGTemp *ts)
2621 {
2622     int idx = temp_idx(ts);
2623 
2624     switch (ts->kind) {
2625     case TEMP_FIXED:
2626     case TEMP_GLOBAL:
2627         pstrcpy(buf, buf_size, ts->name);
2628         break;
2629     case TEMP_TB:
2630         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2631         break;
2632     case TEMP_EBB:
2633         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2634         break;
2635     case TEMP_CONST:
2636         switch (ts->type) {
2637         case TCG_TYPE_I32:
2638             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2639             break;
2640 #if TCG_TARGET_REG_BITS > 32
2641         case TCG_TYPE_I64:
2642             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2643             break;
2644 #endif
2645         case TCG_TYPE_V64:
2646         case TCG_TYPE_V128:
2647         case TCG_TYPE_V256:
2648             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2649                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2650             break;
2651         default:
2652             g_assert_not_reached();
2653         }
2654         break;
2655     }
2656     return buf;
2657 }
2658 
2659 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2660                              int buf_size, TCGArg arg)
2661 {
2662     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2663 }
2664 
2665 static const char * const cond_name[] =
2666 {
2667     [TCG_COND_NEVER] = "never",
2668     [TCG_COND_ALWAYS] = "always",
2669     [TCG_COND_EQ] = "eq",
2670     [TCG_COND_NE] = "ne",
2671     [TCG_COND_LT] = "lt",
2672     [TCG_COND_GE] = "ge",
2673     [TCG_COND_LE] = "le",
2674     [TCG_COND_GT] = "gt",
2675     [TCG_COND_LTU] = "ltu",
2676     [TCG_COND_GEU] = "geu",
2677     [TCG_COND_LEU] = "leu",
2678     [TCG_COND_GTU] = "gtu",
2679     [TCG_COND_TSTEQ] = "tsteq",
2680     [TCG_COND_TSTNE] = "tstne",
2681 };
2682 
2683 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2684 {
2685     [MO_UB]   = "ub",
2686     [MO_SB]   = "sb",
2687     [MO_LEUW] = "leuw",
2688     [MO_LESW] = "lesw",
2689     [MO_LEUL] = "leul",
2690     [MO_LESL] = "lesl",
2691     [MO_LEUQ] = "leq",
2692     [MO_BEUW] = "beuw",
2693     [MO_BESW] = "besw",
2694     [MO_BEUL] = "beul",
2695     [MO_BESL] = "besl",
2696     [MO_BEUQ] = "beq",
2697     [MO_128 + MO_BE] = "beo",
2698     [MO_128 + MO_LE] = "leo",
2699 };
2700 
2701 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2702     [MO_UNALN >> MO_ASHIFT]    = "un+",
2703     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2704     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2705     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2706     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2707     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2708     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2709     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2710 };
2711 
2712 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2713     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2714     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2715     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2716     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2717     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2718     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2719 };
2720 
2721 static const char bswap_flag_name[][6] = {
2722     [TCG_BSWAP_IZ] = "iz",
2723     [TCG_BSWAP_OZ] = "oz",
2724     [TCG_BSWAP_OS] = "os",
2725     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2726     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2727 };
2728 
2729 #ifdef CONFIG_PLUGIN
2730 static const char * const plugin_from_name[] = {
2731     "from-tb",
2732     "from-insn",
2733     "after-insn",
2734     "after-tb",
2735 };
2736 #endif
2737 
2738 static inline bool tcg_regset_single(TCGRegSet d)
2739 {
2740     return (d & (d - 1)) == 0;
2741 }
2742 
2743 static inline TCGReg tcg_regset_first(TCGRegSet d)
2744 {
2745     if (TCG_TARGET_NB_REGS <= 32) {
2746         return ctz32(d);
2747     } else {
2748         return ctz64(d);
2749     }
2750 }
2751 
2752 /* Return only the number of characters output -- no error return. */
2753 #define ne_fprintf(...) \
2754     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2755 
2756 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2757 {
2758     char buf[128];
2759     TCGOp *op;
2760 
2761     QTAILQ_FOREACH(op, &s->ops, link) {
2762         int i, k, nb_oargs, nb_iargs, nb_cargs;
2763         const TCGOpDef *def;
2764         TCGOpcode c;
2765         int col = 0;
2766 
2767         c = op->opc;
2768         def = &tcg_op_defs[c];
2769 
2770         if (c == INDEX_op_insn_start) {
2771             nb_oargs = 0;
2772             col += ne_fprintf(f, "\n ----");
2773 
2774             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2775                 col += ne_fprintf(f, " %016" PRIx64,
2776                                   tcg_get_insn_start_param(op, i));
2777             }
2778         } else if (c == INDEX_op_call) {
2779             const TCGHelperInfo *info = tcg_call_info(op);
2780             void *func = tcg_call_func(op);
2781 
2782             /* variable number of arguments */
2783             nb_oargs = TCGOP_CALLO(op);
2784             nb_iargs = TCGOP_CALLI(op);
2785             nb_cargs = def->nb_cargs;
2786 
2787             col += ne_fprintf(f, " %s ", def->name);
2788 
2789             /*
2790              * Print the function name from TCGHelperInfo, if available.
2791              * Note that plugins have a template function for the info,
2792              * but the actual function pointer comes from the plugin.
2793              */
2794             if (func == info->func) {
2795                 col += ne_fprintf(f, "%s", info->name);
2796             } else {
2797                 col += ne_fprintf(f, "plugin(%p)", func);
2798             }
2799 
2800             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2801             for (i = 0; i < nb_oargs; i++) {
2802                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2803                                                             op->args[i]));
2804             }
2805             for (i = 0; i < nb_iargs; i++) {
2806                 TCGArg arg = op->args[nb_oargs + i];
2807                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2808                 col += ne_fprintf(f, ",%s", t);
2809             }
2810         } else {
2811             col += ne_fprintf(f, " %s ", def->name);
2812 
2813             nb_oargs = def->nb_oargs;
2814             nb_iargs = def->nb_iargs;
2815             nb_cargs = def->nb_cargs;
2816 
2817             if (def->flags & TCG_OPF_VECTOR) {
2818                 col += ne_fprintf(f, "v%d,e%d,",
2819                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2820                                   8 << TCGOP_VECE(op));
2821             }
2822 
2823             k = 0;
2824             for (i = 0; i < nb_oargs; i++) {
2825                 const char *sep =  k ? "," : "";
2826                 col += ne_fprintf(f, "%s%s", sep,
2827                                   tcg_get_arg_str(s, buf, sizeof(buf),
2828                                                   op->args[k++]));
2829             }
2830             for (i = 0; i < nb_iargs; i++) {
2831                 const char *sep =  k ? "," : "";
2832                 col += ne_fprintf(f, "%s%s", sep,
2833                                   tcg_get_arg_str(s, buf, sizeof(buf),
2834                                                   op->args[k++]));
2835             }
2836             switch (c) {
2837             case INDEX_op_brcond_i32:
2838             case INDEX_op_setcond_i32:
2839             case INDEX_op_negsetcond_i32:
2840             case INDEX_op_movcond_i32:
2841             case INDEX_op_brcond2_i32:
2842             case INDEX_op_setcond2_i32:
2843             case INDEX_op_brcond_i64:
2844             case INDEX_op_setcond_i64:
2845             case INDEX_op_negsetcond_i64:
2846             case INDEX_op_movcond_i64:
2847             case INDEX_op_cmp_vec:
2848             case INDEX_op_cmpsel_vec:
2849                 if (op->args[k] < ARRAY_SIZE(cond_name)
2850                     && cond_name[op->args[k]]) {
2851                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2852                 } else {
2853                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2854                 }
2855                 i = 1;
2856                 break;
2857             case INDEX_op_qemu_ld_i32:
2858             case INDEX_op_qemu_st_i32:
2859             case INDEX_op_qemu_st8_i32:
2860             case INDEX_op_qemu_ld_i64:
2861             case INDEX_op_qemu_st_i64:
2862             case INDEX_op_qemu_ld_i128:
2863             case INDEX_op_qemu_st_i128:
2864                 {
2865                     const char *s_al, *s_op, *s_at;
2866                     MemOpIdx oi = op->args[k++];
2867                     MemOp mop = get_memop(oi);
2868                     unsigned ix = get_mmuidx(oi);
2869 
2870                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2871                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2872                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2873                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2874 
2875                     /* If all fields are accounted for, print symbolically. */
2876                     if (!mop && s_al && s_op && s_at) {
2877                         col += ne_fprintf(f, ",%s%s%s,%u",
2878                                           s_at, s_al, s_op, ix);
2879                     } else {
2880                         mop = get_memop(oi);
2881                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2882                     }
2883                     i = 1;
2884                 }
2885                 break;
2886             case INDEX_op_bswap16_i32:
2887             case INDEX_op_bswap16_i64:
2888             case INDEX_op_bswap32_i32:
2889             case INDEX_op_bswap32_i64:
2890             case INDEX_op_bswap64_i64:
2891                 {
2892                     TCGArg flags = op->args[k];
2893                     const char *name = NULL;
2894 
2895                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2896                         name = bswap_flag_name[flags];
2897                     }
2898                     if (name) {
2899                         col += ne_fprintf(f, ",%s", name);
2900                     } else {
2901                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2902                     }
2903                     i = k = 1;
2904                 }
2905                 break;
2906 #ifdef CONFIG_PLUGIN
2907             case INDEX_op_plugin_cb:
2908                 {
2909                     TCGArg from = op->args[k++];
2910                     const char *name = NULL;
2911 
2912                     if (from < ARRAY_SIZE(plugin_from_name)) {
2913                         name = plugin_from_name[from];
2914                     }
2915                     if (name) {
2916                         col += ne_fprintf(f, "%s", name);
2917                     } else {
2918                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2919                     }
2920                     i = 1;
2921                 }
2922                 break;
2923 #endif
2924             default:
2925                 i = 0;
2926                 break;
2927             }
2928             switch (c) {
2929             case INDEX_op_set_label:
2930             case INDEX_op_br:
2931             case INDEX_op_brcond_i32:
2932             case INDEX_op_brcond_i64:
2933             case INDEX_op_brcond2_i32:
2934                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2935                                   arg_label(op->args[k])->id);
2936                 i++, k++;
2937                 break;
2938             case INDEX_op_mb:
2939                 {
2940                     TCGBar membar = op->args[k];
2941                     const char *b_op, *m_op;
2942 
2943                     switch (membar & TCG_BAR_SC) {
2944                     case 0:
2945                         b_op = "none";
2946                         break;
2947                     case TCG_BAR_LDAQ:
2948                         b_op = "acq";
2949                         break;
2950                     case TCG_BAR_STRL:
2951                         b_op = "rel";
2952                         break;
2953                     case TCG_BAR_SC:
2954                         b_op = "seq";
2955                         break;
2956                     default:
2957                         g_assert_not_reached();
2958                     }
2959 
2960                     switch (membar & TCG_MO_ALL) {
2961                     case 0:
2962                         m_op = "none";
2963                         break;
2964                     case TCG_MO_LD_LD:
2965                         m_op = "rr";
2966                         break;
2967                     case TCG_MO_LD_ST:
2968                         m_op = "rw";
2969                         break;
2970                     case TCG_MO_ST_LD:
2971                         m_op = "wr";
2972                         break;
2973                     case TCG_MO_ST_ST:
2974                         m_op = "ww";
2975                         break;
2976                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2977                         m_op = "rr+rw";
2978                         break;
2979                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2980                         m_op = "rr+wr";
2981                         break;
2982                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2983                         m_op = "rr+ww";
2984                         break;
2985                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2986                         m_op = "rw+wr";
2987                         break;
2988                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2989                         m_op = "rw+ww";
2990                         break;
2991                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2992                         m_op = "wr+ww";
2993                         break;
2994                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2995                         m_op = "rr+rw+wr";
2996                         break;
2997                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2998                         m_op = "rr+rw+ww";
2999                         break;
3000                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3001                         m_op = "rr+wr+ww";
3002                         break;
3003                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3004                         m_op = "rw+wr+ww";
3005                         break;
3006                     case TCG_MO_ALL:
3007                         m_op = "all";
3008                         break;
3009                     default:
3010                         g_assert_not_reached();
3011                     }
3012 
3013                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3014                     i++, k++;
3015                 }
3016                 break;
3017             default:
3018                 break;
3019             }
3020             for (; i < nb_cargs; i++, k++) {
3021                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3022                                   op->args[k]);
3023             }
3024         }
3025 
3026         if (have_prefs || op->life) {
3027             for (; col < 40; ++col) {
3028                 putc(' ', f);
3029             }
3030         }
3031 
3032         if (op->life) {
3033             unsigned life = op->life;
3034 
3035             if (life & (SYNC_ARG * 3)) {
3036                 ne_fprintf(f, "  sync:");
3037                 for (i = 0; i < 2; ++i) {
3038                     if (life & (SYNC_ARG << i)) {
3039                         ne_fprintf(f, " %d", i);
3040                     }
3041                 }
3042             }
3043             life /= DEAD_ARG;
3044             if (life) {
3045                 ne_fprintf(f, "  dead:");
3046                 for (i = 0; life; ++i, life >>= 1) {
3047                     if (life & 1) {
3048                         ne_fprintf(f, " %d", i);
3049                     }
3050                 }
3051             }
3052         }
3053 
3054         if (have_prefs) {
3055             for (i = 0; i < nb_oargs; ++i) {
3056                 TCGRegSet set = output_pref(op, i);
3057 
3058                 if (i == 0) {
3059                     ne_fprintf(f, "  pref=");
3060                 } else {
3061                     ne_fprintf(f, ",");
3062                 }
3063                 if (set == 0) {
3064                     ne_fprintf(f, "none");
3065                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3066                     ne_fprintf(f, "all");
3067 #ifdef CONFIG_DEBUG_TCG
3068                 } else if (tcg_regset_single(set)) {
3069                     TCGReg reg = tcg_regset_first(set);
3070                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3071 #endif
3072                 } else if (TCG_TARGET_NB_REGS <= 32) {
3073                     ne_fprintf(f, "0x%x", (uint32_t)set);
3074                 } else {
3075                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3076                 }
3077             }
3078         }
3079 
3080         putc('\n', f);
3081     }
3082 }
3083 
3084 /* we give more priority to constraints with less registers */
3085 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3086 {
3087     int n;
3088 
3089     arg_ct += k;
3090     n = ctpop64(arg_ct->regs);
3091 
3092     /*
3093      * Sort constraints of a single register first, which includes output
3094      * aliases (which must exactly match the input already allocated).
3095      */
3096     if (n == 1 || arg_ct->oalias) {
3097         return INT_MAX;
3098     }
3099 
3100     /*
3101      * Sort register pairs next, first then second immediately after.
3102      * Arbitrarily sort multiple pairs by the index of the first reg;
3103      * there shouldn't be many pairs.
3104      */
3105     switch (arg_ct->pair) {
3106     case 1:
3107     case 3:
3108         return (k + 1) * 2;
3109     case 2:
3110         return (arg_ct->pair_index + 1) * 2 - 1;
3111     }
3112 
3113     /* Finally, sort by decreasing register count. */
3114     assert(n > 1);
3115     return -n;
3116 }
3117 
3118 /* sort from highest priority to lowest */
3119 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3120 {
3121     int i, j;
3122 
3123     for (i = 0; i < n; i++) {
3124         a[start + i].sort_index = start + i;
3125     }
3126     if (n <= 1) {
3127         return;
3128     }
3129     for (i = 0; i < n - 1; i++) {
3130         for (j = i + 1; j < n; j++) {
3131             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3132             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3133             if (p1 < p2) {
3134                 int tmp = a[start + i].sort_index;
3135                 a[start + i].sort_index = a[start + j].sort_index;
3136                 a[start + j].sort_index = tmp;
3137             }
3138         }
3139     }
3140 }
3141 
3142 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3143 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3144 
3145 static void process_constraint_sets(void)
3146 {
3147     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3148         const TCGConstraintSet *tdefs = &constraint_sets[c];
3149         TCGArgConstraint *args_ct = all_cts[c];
3150         int nb_oargs = tdefs->nb_oargs;
3151         int nb_iargs = tdefs->nb_iargs;
3152         int nb_args = nb_oargs + nb_iargs;
3153         bool saw_alias_pair = false;
3154 
3155         for (int i = 0; i < nb_args; i++) {
3156             const char *ct_str = tdefs->args_ct_str[i];
3157             bool input_p = i >= nb_oargs;
3158             int o;
3159 
3160             switch (*ct_str) {
3161             case '0' ... '9':
3162                 o = *ct_str - '0';
3163                 tcg_debug_assert(input_p);
3164                 tcg_debug_assert(o < nb_oargs);
3165                 tcg_debug_assert(args_ct[o].regs != 0);
3166                 tcg_debug_assert(!args_ct[o].oalias);
3167                 args_ct[i] = args_ct[o];
3168                 /* The output sets oalias.  */
3169                 args_ct[o].oalias = 1;
3170                 args_ct[o].alias_index = i;
3171                 /* The input sets ialias. */
3172                 args_ct[i].ialias = 1;
3173                 args_ct[i].alias_index = o;
3174                 if (args_ct[i].pair) {
3175                     saw_alias_pair = true;
3176                 }
3177                 tcg_debug_assert(ct_str[1] == '\0');
3178                 continue;
3179 
3180             case '&':
3181                 tcg_debug_assert(!input_p);
3182                 args_ct[i].newreg = true;
3183                 ct_str++;
3184                 break;
3185 
3186             case 'p': /* plus */
3187                 /* Allocate to the register after the previous. */
3188                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3189                 o = i - 1;
3190                 tcg_debug_assert(!args_ct[o].pair);
3191                 tcg_debug_assert(!args_ct[o].ct);
3192                 args_ct[i] = (TCGArgConstraint){
3193                     .pair = 2,
3194                     .pair_index = o,
3195                     .regs = args_ct[o].regs << 1,
3196                     .newreg = args_ct[o].newreg,
3197                 };
3198                 args_ct[o].pair = 1;
3199                 args_ct[o].pair_index = i;
3200                 tcg_debug_assert(ct_str[1] == '\0');
3201                 continue;
3202 
3203             case 'm': /* minus */
3204                 /* Allocate to the register before the previous. */
3205                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3206                 o = i - 1;
3207                 tcg_debug_assert(!args_ct[o].pair);
3208                 tcg_debug_assert(!args_ct[o].ct);
3209                 args_ct[i] = (TCGArgConstraint){
3210                     .pair = 1,
3211                     .pair_index = o,
3212                     .regs = args_ct[o].regs >> 1,
3213                     .newreg = args_ct[o].newreg,
3214                 };
3215                 args_ct[o].pair = 2;
3216                 args_ct[o].pair_index = i;
3217                 tcg_debug_assert(ct_str[1] == '\0');
3218                 continue;
3219             }
3220 
3221             do {
3222                 switch (*ct_str) {
3223                 case 'i':
3224                     args_ct[i].ct |= TCG_CT_CONST;
3225                     break;
3226 #ifdef TCG_REG_ZERO
3227                 case 'z':
3228                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3229                     break;
3230 #endif
3231 
3232                 /* Include all of the target-specific constraints. */
3233 
3234 #undef CONST
3235 #define CONST(CASE, MASK) \
3236     case CASE: args_ct[i].ct |= MASK; break;
3237 #define REGS(CASE, MASK) \
3238     case CASE: args_ct[i].regs |= MASK; break;
3239 
3240 #include "tcg-target-con-str.h"
3241 
3242 #undef REGS
3243 #undef CONST
3244                 default:
3245                 case '0' ... '9':
3246                 case '&':
3247                 case 'p':
3248                 case 'm':
3249                     /* Typo in TCGConstraintSet constraint. */
3250                     g_assert_not_reached();
3251                 }
3252             } while (*++ct_str != '\0');
3253         }
3254 
3255         /*
3256          * Fix up output pairs that are aliased with inputs.
3257          * When we created the alias, we copied pair from the output.
3258          * There are three cases:
3259          *    (1a) Pairs of inputs alias pairs of outputs.
3260          *    (1b) One input aliases the first of a pair of outputs.
3261          *    (2)  One input aliases the second of a pair of outputs.
3262          *
3263          * Case 1a is handled by making sure that the pair_index'es are
3264          * properly updated so that they appear the same as a pair of inputs.
3265          *
3266          * Case 1b is handled by setting the pair_index of the input to
3267          * itself, simply so it doesn't point to an unrelated argument.
3268          * Since we don't encounter the "second" during the input allocation
3269          * phase, nothing happens with the second half of the input pair.
3270          *
3271          * Case 2 is handled by setting the second input to pair=3, the
3272          * first output to pair=3, and the pair_index'es to match.
3273          */
3274         if (saw_alias_pair) {
3275             for (int i = nb_oargs; i < nb_args; i++) {
3276                 int o, o2, i2;
3277 
3278                 /*
3279                  * Since [0-9pm] must be alone in the constraint string,
3280                  * the only way they can both be set is if the pair comes
3281                  * from the output alias.
3282                  */
3283                 if (!args_ct[i].ialias) {
3284                     continue;
3285                 }
3286                 switch (args_ct[i].pair) {
3287                 case 0:
3288                     break;
3289                 case 1:
3290                     o = args_ct[i].alias_index;
3291                     o2 = args_ct[o].pair_index;
3292                     tcg_debug_assert(args_ct[o].pair == 1);
3293                     tcg_debug_assert(args_ct[o2].pair == 2);
3294                     if (args_ct[o2].oalias) {
3295                         /* Case 1a */
3296                         i2 = args_ct[o2].alias_index;
3297                         tcg_debug_assert(args_ct[i2].pair == 2);
3298                         args_ct[i2].pair_index = i;
3299                         args_ct[i].pair_index = i2;
3300                     } else {
3301                         /* Case 1b */
3302                         args_ct[i].pair_index = i;
3303                     }
3304                     break;
3305                 case 2:
3306                     o = args_ct[i].alias_index;
3307                     o2 = args_ct[o].pair_index;
3308                     tcg_debug_assert(args_ct[o].pair == 2);
3309                     tcg_debug_assert(args_ct[o2].pair == 1);
3310                     if (args_ct[o2].oalias) {
3311                         /* Case 1a */
3312                         i2 = args_ct[o2].alias_index;
3313                         tcg_debug_assert(args_ct[i2].pair == 1);
3314                         args_ct[i2].pair_index = i;
3315                         args_ct[i].pair_index = i2;
3316                     } else {
3317                         /* Case 2 */
3318                         args_ct[i].pair = 3;
3319                         args_ct[o2].pair = 3;
3320                         args_ct[i].pair_index = o2;
3321                         args_ct[o2].pair_index = i;
3322                     }
3323                     break;
3324                 default:
3325                     g_assert_not_reached();
3326                 }
3327             }
3328         }
3329 
3330         /* sort the constraints (XXX: this is just an heuristic) */
3331         sort_constraints(args_ct, 0, nb_oargs);
3332         sort_constraints(args_ct, nb_oargs, nb_iargs);
3333     }
3334 }
3335 
3336 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3337 {
3338     const TCGOpDef *def = &tcg_op_defs[op->opc];
3339     TCGConstraintSetIndex con_set;
3340 
3341 #ifdef CONFIG_DEBUG_TCG
3342     assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
3343 #endif
3344 
3345     if (def->flags & TCG_OPF_NOT_PRESENT) {
3346         return empty_cts;
3347     }
3348 
3349     con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op));
3350     tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
3351 
3352     /* The constraint arguments must match TCGOpcode arguments. */
3353     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3354     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3355 
3356     return all_cts[con_set];
3357 }
3358 
3359 static void remove_label_use(TCGOp *op, int idx)
3360 {
3361     TCGLabel *label = arg_label(op->args[idx]);
3362     TCGLabelUse *use;
3363 
3364     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3365         if (use->op == op) {
3366             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3367             return;
3368         }
3369     }
3370     g_assert_not_reached();
3371 }
3372 
3373 void tcg_op_remove(TCGContext *s, TCGOp *op)
3374 {
3375     switch (op->opc) {
3376     case INDEX_op_br:
3377         remove_label_use(op, 0);
3378         break;
3379     case INDEX_op_brcond_i32:
3380     case INDEX_op_brcond_i64:
3381         remove_label_use(op, 3);
3382         break;
3383     case INDEX_op_brcond2_i32:
3384         remove_label_use(op, 5);
3385         break;
3386     default:
3387         break;
3388     }
3389 
3390     QTAILQ_REMOVE(&s->ops, op, link);
3391     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3392     s->nb_ops--;
3393 }
3394 
3395 void tcg_remove_ops_after(TCGOp *op)
3396 {
3397     TCGContext *s = tcg_ctx;
3398 
3399     while (true) {
3400         TCGOp *last = tcg_last_op();
3401         if (last == op) {
3402             return;
3403         }
3404         tcg_op_remove(s, last);
3405     }
3406 }
3407 
3408 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3409 {
3410     TCGContext *s = tcg_ctx;
3411     TCGOp *op = NULL;
3412 
3413     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3414         QTAILQ_FOREACH(op, &s->free_ops, link) {
3415             if (nargs <= op->nargs) {
3416                 QTAILQ_REMOVE(&s->free_ops, op, link);
3417                 nargs = op->nargs;
3418                 goto found;
3419             }
3420         }
3421     }
3422 
3423     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3424     nargs = MAX(4, nargs);
3425     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3426 
3427  found:
3428     memset(op, 0, offsetof(TCGOp, link));
3429     op->opc = opc;
3430     op->nargs = nargs;
3431 
3432     /* Check for bitfield overflow. */
3433     tcg_debug_assert(op->nargs == nargs);
3434 
3435     s->nb_ops++;
3436     return op;
3437 }
3438 
3439 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3440 {
3441     TCGOp *op = tcg_op_alloc(opc, nargs);
3442 
3443     if (tcg_ctx->emit_before_op) {
3444         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3445     } else {
3446         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3447     }
3448     return op;
3449 }
3450 
3451 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3452                             TCGOpcode opc, unsigned nargs)
3453 {
3454     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3455 
3456     TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
3457     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3458     return new_op;
3459 }
3460 
3461 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3462                            TCGOpcode opc, unsigned nargs)
3463 {
3464     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3465 
3466     TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
3467     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3468     return new_op;
3469 }
3470 
3471 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3472 {
3473     TCGLabelUse *u;
3474 
3475     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3476         TCGOp *op = u->op;
3477         switch (op->opc) {
3478         case INDEX_op_br:
3479             op->args[0] = label_arg(to);
3480             break;
3481         case INDEX_op_brcond_i32:
3482         case INDEX_op_brcond_i64:
3483             op->args[3] = label_arg(to);
3484             break;
3485         case INDEX_op_brcond2_i32:
3486             op->args[5] = label_arg(to);
3487             break;
3488         default:
3489             g_assert_not_reached();
3490         }
3491     }
3492 
3493     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3494 }
3495 
3496 /* Reachable analysis : remove unreachable code.  */
3497 static void __attribute__((noinline))
3498 reachable_code_pass(TCGContext *s)
3499 {
3500     TCGOp *op, *op_next, *op_prev;
3501     bool dead = false;
3502 
3503     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3504         bool remove = dead;
3505         TCGLabel *label;
3506 
3507         switch (op->opc) {
3508         case INDEX_op_set_label:
3509             label = arg_label(op->args[0]);
3510 
3511             /*
3512              * Note that the first op in the TB is always a load,
3513              * so there is always something before a label.
3514              */
3515             op_prev = QTAILQ_PREV(op, link);
3516 
3517             /*
3518              * If we find two sequential labels, move all branches to
3519              * reference the second label and remove the first label.
3520              * Do this before branch to next optimization, so that the
3521              * middle label is out of the way.
3522              */
3523             if (op_prev->opc == INDEX_op_set_label) {
3524                 move_label_uses(label, arg_label(op_prev->args[0]));
3525                 tcg_op_remove(s, op_prev);
3526                 op_prev = QTAILQ_PREV(op, link);
3527             }
3528 
3529             /*
3530              * Optimization can fold conditional branches to unconditional.
3531              * If we find a label which is preceded by an unconditional
3532              * branch to next, remove the branch.  We couldn't do this when
3533              * processing the branch because any dead code between the branch
3534              * and label had not yet been removed.
3535              */
3536             if (op_prev->opc == INDEX_op_br &&
3537                 label == arg_label(op_prev->args[0])) {
3538                 tcg_op_remove(s, op_prev);
3539                 /* Fall through means insns become live again.  */
3540                 dead = false;
3541             }
3542 
3543             if (QSIMPLEQ_EMPTY(&label->branches)) {
3544                 /*
3545                  * While there is an occasional backward branch, virtually
3546                  * all branches generated by the translators are forward.
3547                  * Which means that generally we will have already removed
3548                  * all references to the label that will be, and there is
3549                  * little to be gained by iterating.
3550                  */
3551                 remove = true;
3552             } else {
3553                 /* Once we see a label, insns become live again.  */
3554                 dead = false;
3555                 remove = false;
3556             }
3557             break;
3558 
3559         case INDEX_op_br:
3560         case INDEX_op_exit_tb:
3561         case INDEX_op_goto_ptr:
3562             /* Unconditional branches; everything following is dead.  */
3563             dead = true;
3564             break;
3565 
3566         case INDEX_op_call:
3567             /* Notice noreturn helper calls, raising exceptions.  */
3568             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3569                 dead = true;
3570             }
3571             break;
3572 
3573         case INDEX_op_insn_start:
3574             /* Never remove -- we need to keep these for unwind.  */
3575             remove = false;
3576             break;
3577 
3578         default:
3579             break;
3580         }
3581 
3582         if (remove) {
3583             tcg_op_remove(s, op);
3584         }
3585     }
3586 }
3587 
3588 #define TS_DEAD  1
3589 #define TS_MEM   2
3590 
3591 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3592 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3593 
3594 /* For liveness_pass_1, the register preferences for a given temp.  */
3595 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3596 {
3597     return ts->state_ptr;
3598 }
3599 
3600 /* For liveness_pass_1, reset the preferences for a given temp to the
3601  * maximal regset for its type.
3602  */
3603 static inline void la_reset_pref(TCGTemp *ts)
3604 {
3605     *la_temp_pref(ts)
3606         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3607 }
3608 
3609 /* liveness analysis: end of function: all temps are dead, and globals
3610    should be in memory. */
3611 static void la_func_end(TCGContext *s, int ng, int nt)
3612 {
3613     int i;
3614 
3615     for (i = 0; i < ng; ++i) {
3616         s->temps[i].state = TS_DEAD | TS_MEM;
3617         la_reset_pref(&s->temps[i]);
3618     }
3619     for (i = ng; i < nt; ++i) {
3620         s->temps[i].state = TS_DEAD;
3621         la_reset_pref(&s->temps[i]);
3622     }
3623 }
3624 
3625 /* liveness analysis: end of basic block: all temps are dead, globals
3626    and local temps should be in memory. */
3627 static void la_bb_end(TCGContext *s, int ng, int nt)
3628 {
3629     int i;
3630 
3631     for (i = 0; i < nt; ++i) {
3632         TCGTemp *ts = &s->temps[i];
3633         int state;
3634 
3635         switch (ts->kind) {
3636         case TEMP_FIXED:
3637         case TEMP_GLOBAL:
3638         case TEMP_TB:
3639             state = TS_DEAD | TS_MEM;
3640             break;
3641         case TEMP_EBB:
3642         case TEMP_CONST:
3643             state = TS_DEAD;
3644             break;
3645         default:
3646             g_assert_not_reached();
3647         }
3648         ts->state = state;
3649         la_reset_pref(ts);
3650     }
3651 }
3652 
3653 /* liveness analysis: sync globals back to memory.  */
3654 static void la_global_sync(TCGContext *s, int ng)
3655 {
3656     int i;
3657 
3658     for (i = 0; i < ng; ++i) {
3659         int state = s->temps[i].state;
3660         s->temps[i].state = state | TS_MEM;
3661         if (state == TS_DEAD) {
3662             /* If the global was previously dead, reset prefs.  */
3663             la_reset_pref(&s->temps[i]);
3664         }
3665     }
3666 }
3667 
3668 /*
3669  * liveness analysis: conditional branch: all temps are dead unless
3670  * explicitly live-across-conditional-branch, globals and local temps
3671  * should be synced.
3672  */
3673 static void la_bb_sync(TCGContext *s, int ng, int nt)
3674 {
3675     la_global_sync(s, ng);
3676 
3677     for (int i = ng; i < nt; ++i) {
3678         TCGTemp *ts = &s->temps[i];
3679         int state;
3680 
3681         switch (ts->kind) {
3682         case TEMP_TB:
3683             state = ts->state;
3684             ts->state = state | TS_MEM;
3685             if (state != TS_DEAD) {
3686                 continue;
3687             }
3688             break;
3689         case TEMP_EBB:
3690         case TEMP_CONST:
3691             continue;
3692         default:
3693             g_assert_not_reached();
3694         }
3695         la_reset_pref(&s->temps[i]);
3696     }
3697 }
3698 
3699 /* liveness analysis: sync globals back to memory and kill.  */
3700 static void la_global_kill(TCGContext *s, int ng)
3701 {
3702     int i;
3703 
3704     for (i = 0; i < ng; i++) {
3705         s->temps[i].state = TS_DEAD | TS_MEM;
3706         la_reset_pref(&s->temps[i]);
3707     }
3708 }
3709 
3710 /* liveness analysis: note live globals crossing calls.  */
3711 static void la_cross_call(TCGContext *s, int nt)
3712 {
3713     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3714     int i;
3715 
3716     for (i = 0; i < nt; i++) {
3717         TCGTemp *ts = &s->temps[i];
3718         if (!(ts->state & TS_DEAD)) {
3719             TCGRegSet *pset = la_temp_pref(ts);
3720             TCGRegSet set = *pset;
3721 
3722             set &= mask;
3723             /* If the combination is not possible, restart.  */
3724             if (set == 0) {
3725                 set = tcg_target_available_regs[ts->type] & mask;
3726             }
3727             *pset = set;
3728         }
3729     }
3730 }
3731 
3732 /*
3733  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3734  * to TEMP_EBB, if possible.
3735  */
3736 static void __attribute__((noinline))
3737 liveness_pass_0(TCGContext *s)
3738 {
3739     void * const multiple_ebb = (void *)(uintptr_t)-1;
3740     int nb_temps = s->nb_temps;
3741     TCGOp *op, *ebb;
3742 
3743     for (int i = s->nb_globals; i < nb_temps; ++i) {
3744         s->temps[i].state_ptr = NULL;
3745     }
3746 
3747     /*
3748      * Represent each EBB by the op at which it begins.  In the case of
3749      * the first EBB, this is the first op, otherwise it is a label.
3750      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3751      * within a single EBB, else MULTIPLE_EBB.
3752      */
3753     ebb = QTAILQ_FIRST(&s->ops);
3754     QTAILQ_FOREACH(op, &s->ops, link) {
3755         const TCGOpDef *def;
3756         int nb_oargs, nb_iargs;
3757 
3758         switch (op->opc) {
3759         case INDEX_op_set_label:
3760             ebb = op;
3761             continue;
3762         case INDEX_op_discard:
3763             continue;
3764         case INDEX_op_call:
3765             nb_oargs = TCGOP_CALLO(op);
3766             nb_iargs = TCGOP_CALLI(op);
3767             break;
3768         default:
3769             def = &tcg_op_defs[op->opc];
3770             nb_oargs = def->nb_oargs;
3771             nb_iargs = def->nb_iargs;
3772             break;
3773         }
3774 
3775         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3776             TCGTemp *ts = arg_temp(op->args[i]);
3777 
3778             if (ts->kind != TEMP_TB) {
3779                 continue;
3780             }
3781             if (ts->state_ptr == NULL) {
3782                 ts->state_ptr = ebb;
3783             } else if (ts->state_ptr != ebb) {
3784                 ts->state_ptr = multiple_ebb;
3785             }
3786         }
3787     }
3788 
3789     /*
3790      * For TEMP_TB that turned out not to be used beyond one EBB,
3791      * reduce the liveness to TEMP_EBB.
3792      */
3793     for (int i = s->nb_globals; i < nb_temps; ++i) {
3794         TCGTemp *ts = &s->temps[i];
3795         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3796             ts->kind = TEMP_EBB;
3797         }
3798     }
3799 }
3800 
3801 /* Liveness analysis : update the opc_arg_life array to tell if a
3802    given input arguments is dead. Instructions updating dead
3803    temporaries are removed. */
3804 static void __attribute__((noinline))
3805 liveness_pass_1(TCGContext *s)
3806 {
3807     int nb_globals = s->nb_globals;
3808     int nb_temps = s->nb_temps;
3809     TCGOp *op, *op_prev;
3810     TCGRegSet *prefs;
3811     int i;
3812 
3813     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3814     for (i = 0; i < nb_temps; ++i) {
3815         s->temps[i].state_ptr = prefs + i;
3816     }
3817 
3818     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3819     la_func_end(s, nb_globals, nb_temps);
3820 
3821     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3822         int nb_iargs, nb_oargs;
3823         TCGOpcode opc_new, opc_new2;
3824         bool have_opc_new2;
3825         TCGLifeData arg_life = 0;
3826         TCGTemp *ts;
3827         TCGOpcode opc = op->opc;
3828         const TCGOpDef *def = &tcg_op_defs[opc];
3829         const TCGArgConstraint *args_ct;
3830 
3831         switch (opc) {
3832         case INDEX_op_call:
3833             {
3834                 const TCGHelperInfo *info = tcg_call_info(op);
3835                 int call_flags = tcg_call_flags(op);
3836 
3837                 nb_oargs = TCGOP_CALLO(op);
3838                 nb_iargs = TCGOP_CALLI(op);
3839 
3840                 /* pure functions can be removed if their result is unused */
3841                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3842                     for (i = 0; i < nb_oargs; i++) {
3843                         ts = arg_temp(op->args[i]);
3844                         if (ts->state != TS_DEAD) {
3845                             goto do_not_remove_call;
3846                         }
3847                     }
3848                     goto do_remove;
3849                 }
3850             do_not_remove_call:
3851 
3852                 /* Output args are dead.  */
3853                 for (i = 0; i < nb_oargs; i++) {
3854                     ts = arg_temp(op->args[i]);
3855                     if (ts->state & TS_DEAD) {
3856                         arg_life |= DEAD_ARG << i;
3857                     }
3858                     if (ts->state & TS_MEM) {
3859                         arg_life |= SYNC_ARG << i;
3860                     }
3861                     ts->state = TS_DEAD;
3862                     la_reset_pref(ts);
3863                 }
3864 
3865                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3866                 memset(op->output_pref, 0, sizeof(op->output_pref));
3867 
3868                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3869                                     TCG_CALL_NO_READ_GLOBALS))) {
3870                     la_global_kill(s, nb_globals);
3871                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3872                     la_global_sync(s, nb_globals);
3873                 }
3874 
3875                 /* Record arguments that die in this helper.  */
3876                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3877                     ts = arg_temp(op->args[i]);
3878                     if (ts->state & TS_DEAD) {
3879                         arg_life |= DEAD_ARG << i;
3880                     }
3881                 }
3882 
3883                 /* For all live registers, remove call-clobbered prefs.  */
3884                 la_cross_call(s, nb_temps);
3885 
3886                 /*
3887                  * Input arguments are live for preceding opcodes.
3888                  *
3889                  * For those arguments that die, and will be allocated in
3890                  * registers, clear the register set for that arg, to be
3891                  * filled in below.  For args that will be on the stack,
3892                  * reset to any available reg.  Process arguments in reverse
3893                  * order so that if a temp is used more than once, the stack
3894                  * reset to max happens before the register reset to 0.
3895                  */
3896                 for (i = nb_iargs - 1; i >= 0; i--) {
3897                     const TCGCallArgumentLoc *loc = &info->in[i];
3898                     ts = arg_temp(op->args[nb_oargs + i]);
3899 
3900                     if (ts->state & TS_DEAD) {
3901                         switch (loc->kind) {
3902                         case TCG_CALL_ARG_NORMAL:
3903                         case TCG_CALL_ARG_EXTEND_U:
3904                         case TCG_CALL_ARG_EXTEND_S:
3905                             if (arg_slot_reg_p(loc->arg_slot)) {
3906                                 *la_temp_pref(ts) = 0;
3907                                 break;
3908                             }
3909                             /* fall through */
3910                         default:
3911                             *la_temp_pref(ts) =
3912                                 tcg_target_available_regs[ts->type];
3913                             break;
3914                         }
3915                         ts->state &= ~TS_DEAD;
3916                     }
3917                 }
3918 
3919                 /*
3920                  * For each input argument, add its input register to prefs.
3921                  * If a temp is used once, this produces a single set bit;
3922                  * if a temp is used multiple times, this produces a set.
3923                  */
3924                 for (i = 0; i < nb_iargs; i++) {
3925                     const TCGCallArgumentLoc *loc = &info->in[i];
3926                     ts = arg_temp(op->args[nb_oargs + i]);
3927 
3928                     switch (loc->kind) {
3929                     case TCG_CALL_ARG_NORMAL:
3930                     case TCG_CALL_ARG_EXTEND_U:
3931                     case TCG_CALL_ARG_EXTEND_S:
3932                         if (arg_slot_reg_p(loc->arg_slot)) {
3933                             tcg_regset_set_reg(*la_temp_pref(ts),
3934                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3935                         }
3936                         break;
3937                     default:
3938                         break;
3939                     }
3940                 }
3941             }
3942             break;
3943         case INDEX_op_insn_start:
3944             break;
3945         case INDEX_op_discard:
3946             /* mark the temporary as dead */
3947             ts = arg_temp(op->args[0]);
3948             ts->state = TS_DEAD;
3949             la_reset_pref(ts);
3950             break;
3951 
3952         case INDEX_op_add2_i32:
3953             opc_new = INDEX_op_add_i32;
3954             goto do_addsub2;
3955         case INDEX_op_sub2_i32:
3956             opc_new = INDEX_op_sub_i32;
3957             goto do_addsub2;
3958         case INDEX_op_add2_i64:
3959             opc_new = INDEX_op_add_i64;
3960             goto do_addsub2;
3961         case INDEX_op_sub2_i64:
3962             opc_new = INDEX_op_sub_i64;
3963         do_addsub2:
3964             nb_iargs = 4;
3965             nb_oargs = 2;
3966             /* Test if the high part of the operation is dead, but not
3967                the low part.  The result can be optimized to a simple
3968                add or sub.  This happens often for x86_64 guest when the
3969                cpu mode is set to 32 bit.  */
3970             if (arg_temp(op->args[1])->state == TS_DEAD) {
3971                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3972                     goto do_remove;
3973                 }
3974                 /* Replace the opcode and adjust the args in place,
3975                    leaving 3 unused args at the end.  */
3976                 op->opc = opc = opc_new;
3977                 op->args[1] = op->args[2];
3978                 op->args[2] = op->args[4];
3979                 /* Fall through and mark the single-word operation live.  */
3980                 nb_iargs = 2;
3981                 nb_oargs = 1;
3982             }
3983             goto do_not_remove;
3984 
3985         case INDEX_op_mulu2_i32:
3986             opc_new = INDEX_op_mul_i32;
3987             opc_new2 = INDEX_op_muluh_i32;
3988             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3989             goto do_mul2;
3990         case INDEX_op_muls2_i32:
3991             opc_new = INDEX_op_mul_i32;
3992             opc_new2 = INDEX_op_mulsh_i32;
3993             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3994             goto do_mul2;
3995         case INDEX_op_mulu2_i64:
3996             opc_new = INDEX_op_mul_i64;
3997             opc_new2 = INDEX_op_muluh_i64;
3998             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3999             goto do_mul2;
4000         case INDEX_op_muls2_i64:
4001             opc_new = INDEX_op_mul_i64;
4002             opc_new2 = INDEX_op_mulsh_i64;
4003             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4004             goto do_mul2;
4005         do_mul2:
4006             nb_iargs = 2;
4007             nb_oargs = 2;
4008             if (arg_temp(op->args[1])->state == TS_DEAD) {
4009                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4010                     /* Both parts of the operation are dead.  */
4011                     goto do_remove;
4012                 }
4013                 /* The high part of the operation is dead; generate the low. */
4014                 op->opc = opc = opc_new;
4015                 op->args[1] = op->args[2];
4016                 op->args[2] = op->args[3];
4017             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4018                 /* The low part of the operation is dead; generate the high. */
4019                 op->opc = opc = opc_new2;
4020                 op->args[0] = op->args[1];
4021                 op->args[1] = op->args[2];
4022                 op->args[2] = op->args[3];
4023             } else {
4024                 goto do_not_remove;
4025             }
4026             /* Mark the single-word operation live.  */
4027             nb_oargs = 1;
4028             goto do_not_remove;
4029 
4030         default:
4031             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4032             nb_iargs = def->nb_iargs;
4033             nb_oargs = def->nb_oargs;
4034 
4035             /* Test if the operation can be removed because all
4036                its outputs are dead. We assume that nb_oargs == 0
4037                implies side effects */
4038             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4039                 for (i = 0; i < nb_oargs; i++) {
4040                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4041                         goto do_not_remove;
4042                     }
4043                 }
4044                 goto do_remove;
4045             }
4046             goto do_not_remove;
4047 
4048         do_remove:
4049             tcg_op_remove(s, op);
4050             break;
4051 
4052         do_not_remove:
4053             for (i = 0; i < nb_oargs; i++) {
4054                 ts = arg_temp(op->args[i]);
4055 
4056                 /* Remember the preference of the uses that followed.  */
4057                 if (i < ARRAY_SIZE(op->output_pref)) {
4058                     op->output_pref[i] = *la_temp_pref(ts);
4059                 }
4060 
4061                 /* Output args are dead.  */
4062                 if (ts->state & TS_DEAD) {
4063                     arg_life |= DEAD_ARG << i;
4064                 }
4065                 if (ts->state & TS_MEM) {
4066                     arg_life |= SYNC_ARG << i;
4067                 }
4068                 ts->state = TS_DEAD;
4069                 la_reset_pref(ts);
4070             }
4071 
4072             /* If end of basic block, update.  */
4073             if (def->flags & TCG_OPF_BB_EXIT) {
4074                 la_func_end(s, nb_globals, nb_temps);
4075             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4076                 la_bb_sync(s, nb_globals, nb_temps);
4077             } else if (def->flags & TCG_OPF_BB_END) {
4078                 la_bb_end(s, nb_globals, nb_temps);
4079             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4080                 la_global_sync(s, nb_globals);
4081                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4082                     la_cross_call(s, nb_temps);
4083                 }
4084             }
4085 
4086             /* Record arguments that die in this opcode.  */
4087             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4088                 ts = arg_temp(op->args[i]);
4089                 if (ts->state & TS_DEAD) {
4090                     arg_life |= DEAD_ARG << i;
4091                 }
4092             }
4093 
4094             /* Input arguments are live for preceding opcodes.  */
4095             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4096                 ts = arg_temp(op->args[i]);
4097                 if (ts->state & TS_DEAD) {
4098                     /* For operands that were dead, initially allow
4099                        all regs for the type.  */
4100                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4101                     ts->state &= ~TS_DEAD;
4102                 }
4103             }
4104 
4105             /* Incorporate constraints for this operand.  */
4106             switch (opc) {
4107             case INDEX_op_mov_i32:
4108             case INDEX_op_mov_i64:
4109                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4110                    have proper constraints.  That said, special case
4111                    moves to propagate preferences backward.  */
4112                 if (IS_DEAD_ARG(1)) {
4113                     *la_temp_pref(arg_temp(op->args[0]))
4114                         = *la_temp_pref(arg_temp(op->args[1]));
4115                 }
4116                 break;
4117 
4118             default:
4119                 args_ct = opcode_args_ct(op);
4120                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4121                     const TCGArgConstraint *ct = &args_ct[i];
4122                     TCGRegSet set, *pset;
4123 
4124                     ts = arg_temp(op->args[i]);
4125                     pset = la_temp_pref(ts);
4126                     set = *pset;
4127 
4128                     set &= ct->regs;
4129                     if (ct->ialias) {
4130                         set &= output_pref(op, ct->alias_index);
4131                     }
4132                     /* If the combination is not possible, restart.  */
4133                     if (set == 0) {
4134                         set = ct->regs;
4135                     }
4136                     *pset = set;
4137                 }
4138                 break;
4139             }
4140             break;
4141         }
4142         op->life = arg_life;
4143     }
4144 }
4145 
4146 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4147 static bool __attribute__((noinline))
4148 liveness_pass_2(TCGContext *s)
4149 {
4150     int nb_globals = s->nb_globals;
4151     int nb_temps, i;
4152     bool changes = false;
4153     TCGOp *op, *op_next;
4154 
4155     /* Create a temporary for each indirect global.  */
4156     for (i = 0; i < nb_globals; ++i) {
4157         TCGTemp *its = &s->temps[i];
4158         if (its->indirect_reg) {
4159             TCGTemp *dts = tcg_temp_alloc(s);
4160             dts->type = its->type;
4161             dts->base_type = its->base_type;
4162             dts->temp_subindex = its->temp_subindex;
4163             dts->kind = TEMP_EBB;
4164             its->state_ptr = dts;
4165         } else {
4166             its->state_ptr = NULL;
4167         }
4168         /* All globals begin dead.  */
4169         its->state = TS_DEAD;
4170     }
4171     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4172         TCGTemp *its = &s->temps[i];
4173         its->state_ptr = NULL;
4174         its->state = TS_DEAD;
4175     }
4176 
4177     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4178         TCGOpcode opc = op->opc;
4179         const TCGOpDef *def = &tcg_op_defs[opc];
4180         TCGLifeData arg_life = op->life;
4181         int nb_iargs, nb_oargs, call_flags;
4182         TCGTemp *arg_ts, *dir_ts;
4183 
4184         if (opc == INDEX_op_call) {
4185             nb_oargs = TCGOP_CALLO(op);
4186             nb_iargs = TCGOP_CALLI(op);
4187             call_flags = tcg_call_flags(op);
4188         } else {
4189             nb_iargs = def->nb_iargs;
4190             nb_oargs = def->nb_oargs;
4191 
4192             /* Set flags similar to how calls require.  */
4193             if (def->flags & TCG_OPF_COND_BRANCH) {
4194                 /* Like reading globals: sync_globals */
4195                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4196             } else if (def->flags & TCG_OPF_BB_END) {
4197                 /* Like writing globals: save_globals */
4198                 call_flags = 0;
4199             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4200                 /* Like reading globals: sync_globals */
4201                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4202             } else {
4203                 /* No effect on globals.  */
4204                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4205                               TCG_CALL_NO_WRITE_GLOBALS);
4206             }
4207         }
4208 
4209         /* Make sure that input arguments are available.  */
4210         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4211             arg_ts = arg_temp(op->args[i]);
4212             dir_ts = arg_ts->state_ptr;
4213             if (dir_ts && arg_ts->state == TS_DEAD) {
4214                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4215                                   ? INDEX_op_ld_i32
4216                                   : INDEX_op_ld_i64);
4217                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
4218 
4219                 lop->args[0] = temp_arg(dir_ts);
4220                 lop->args[1] = temp_arg(arg_ts->mem_base);
4221                 lop->args[2] = arg_ts->mem_offset;
4222 
4223                 /* Loaded, but synced with memory.  */
4224                 arg_ts->state = TS_MEM;
4225             }
4226         }
4227 
4228         /* Perform input replacement, and mark inputs that became dead.
4229            No action is required except keeping temp_state up to date
4230            so that we reload when needed.  */
4231         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4232             arg_ts = arg_temp(op->args[i]);
4233             dir_ts = arg_ts->state_ptr;
4234             if (dir_ts) {
4235                 op->args[i] = temp_arg(dir_ts);
4236                 changes = true;
4237                 if (IS_DEAD_ARG(i)) {
4238                     arg_ts->state = TS_DEAD;
4239                 }
4240             }
4241         }
4242 
4243         /* Liveness analysis should ensure that the following are
4244            all correct, for call sites and basic block end points.  */
4245         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4246             /* Nothing to do */
4247         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4248             for (i = 0; i < nb_globals; ++i) {
4249                 /* Liveness should see that globals are synced back,
4250                    that is, either TS_DEAD or TS_MEM.  */
4251                 arg_ts = &s->temps[i];
4252                 tcg_debug_assert(arg_ts->state_ptr == 0
4253                                  || arg_ts->state != 0);
4254             }
4255         } else {
4256             for (i = 0; i < nb_globals; ++i) {
4257                 /* Liveness should see that globals are saved back,
4258                    that is, TS_DEAD, waiting to be reloaded.  */
4259                 arg_ts = &s->temps[i];
4260                 tcg_debug_assert(arg_ts->state_ptr == 0
4261                                  || arg_ts->state == TS_DEAD);
4262             }
4263         }
4264 
4265         /* Outputs become available.  */
4266         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4267             arg_ts = arg_temp(op->args[0]);
4268             dir_ts = arg_ts->state_ptr;
4269             if (dir_ts) {
4270                 op->args[0] = temp_arg(dir_ts);
4271                 changes = true;
4272 
4273                 /* The output is now live and modified.  */
4274                 arg_ts->state = 0;
4275 
4276                 if (NEED_SYNC_ARG(0)) {
4277                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4278                                       ? INDEX_op_st_i32
4279                                       : INDEX_op_st_i64);
4280                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4281                     TCGTemp *out_ts = dir_ts;
4282 
4283                     if (IS_DEAD_ARG(0)) {
4284                         out_ts = arg_temp(op->args[1]);
4285                         arg_ts->state = TS_DEAD;
4286                         tcg_op_remove(s, op);
4287                     } else {
4288                         arg_ts->state = TS_MEM;
4289                     }
4290 
4291                     sop->args[0] = temp_arg(out_ts);
4292                     sop->args[1] = temp_arg(arg_ts->mem_base);
4293                     sop->args[2] = arg_ts->mem_offset;
4294                 } else {
4295                     tcg_debug_assert(!IS_DEAD_ARG(0));
4296                 }
4297             }
4298         } else {
4299             for (i = 0; i < nb_oargs; i++) {
4300                 arg_ts = arg_temp(op->args[i]);
4301                 dir_ts = arg_ts->state_ptr;
4302                 if (!dir_ts) {
4303                     continue;
4304                 }
4305                 op->args[i] = temp_arg(dir_ts);
4306                 changes = true;
4307 
4308                 /* The output is now live and modified.  */
4309                 arg_ts->state = 0;
4310 
4311                 /* Sync outputs upon their last write.  */
4312                 if (NEED_SYNC_ARG(i)) {
4313                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4314                                       ? INDEX_op_st_i32
4315                                       : INDEX_op_st_i64);
4316                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4317 
4318                     sop->args[0] = temp_arg(dir_ts);
4319                     sop->args[1] = temp_arg(arg_ts->mem_base);
4320                     sop->args[2] = arg_ts->mem_offset;
4321 
4322                     arg_ts->state = TS_MEM;
4323                 }
4324                 /* Drop outputs that are dead.  */
4325                 if (IS_DEAD_ARG(i)) {
4326                     arg_ts->state = TS_DEAD;
4327                 }
4328             }
4329         }
4330     }
4331 
4332     return changes;
4333 }
4334 
4335 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4336 {
4337     intptr_t off;
4338     int size, align;
4339 
4340     /* When allocating an object, look at the full type. */
4341     size = tcg_type_size(ts->base_type);
4342     switch (ts->base_type) {
4343     case TCG_TYPE_I32:
4344         align = 4;
4345         break;
4346     case TCG_TYPE_I64:
4347     case TCG_TYPE_V64:
4348         align = 8;
4349         break;
4350     case TCG_TYPE_I128:
4351     case TCG_TYPE_V128:
4352     case TCG_TYPE_V256:
4353         /*
4354          * Note that we do not require aligned storage for V256,
4355          * and that we provide alignment for I128 to match V128,
4356          * even if that's above what the host ABI requires.
4357          */
4358         align = 16;
4359         break;
4360     default:
4361         g_assert_not_reached();
4362     }
4363 
4364     /*
4365      * Assume the stack is sufficiently aligned.
4366      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4367      * and do not require 16 byte vector alignment.  This seems slightly
4368      * easier than fully parameterizing the above switch statement.
4369      */
4370     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4371     off = ROUND_UP(s->current_frame_offset, align);
4372 
4373     /* If we've exhausted the stack frame, restart with a smaller TB. */
4374     if (off + size > s->frame_end) {
4375         tcg_raise_tb_overflow(s);
4376     }
4377     s->current_frame_offset = off + size;
4378 #if defined(__sparc__)
4379     off += TCG_TARGET_STACK_BIAS;
4380 #endif
4381 
4382     /* If the object was subdivided, assign memory to all the parts. */
4383     if (ts->base_type != ts->type) {
4384         int part_size = tcg_type_size(ts->type);
4385         int part_count = size / part_size;
4386 
4387         /*
4388          * Each part is allocated sequentially in tcg_temp_new_internal.
4389          * Jump back to the first part by subtracting the current index.
4390          */
4391         ts -= ts->temp_subindex;
4392         for (int i = 0; i < part_count; ++i) {
4393             ts[i].mem_offset = off + i * part_size;
4394             ts[i].mem_base = s->frame_temp;
4395             ts[i].mem_allocated = 1;
4396         }
4397     } else {
4398         ts->mem_offset = off;
4399         ts->mem_base = s->frame_temp;
4400         ts->mem_allocated = 1;
4401     }
4402 }
4403 
4404 /* Assign @reg to @ts, and update reg_to_temp[]. */
4405 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4406 {
4407     if (ts->val_type == TEMP_VAL_REG) {
4408         TCGReg old = ts->reg;
4409         tcg_debug_assert(s->reg_to_temp[old] == ts);
4410         if (old == reg) {
4411             return;
4412         }
4413         s->reg_to_temp[old] = NULL;
4414     }
4415     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4416     s->reg_to_temp[reg] = ts;
4417     ts->val_type = TEMP_VAL_REG;
4418     ts->reg = reg;
4419 }
4420 
4421 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4422 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4423 {
4424     tcg_debug_assert(type != TEMP_VAL_REG);
4425     if (ts->val_type == TEMP_VAL_REG) {
4426         TCGReg reg = ts->reg;
4427         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4428         s->reg_to_temp[reg] = NULL;
4429     }
4430     ts->val_type = type;
4431 }
4432 
4433 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4434 
4435 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4436    mark it free; otherwise mark it dead.  */
4437 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4438 {
4439     TCGTempVal new_type;
4440 
4441     switch (ts->kind) {
4442     case TEMP_FIXED:
4443         return;
4444     case TEMP_GLOBAL:
4445     case TEMP_TB:
4446         new_type = TEMP_VAL_MEM;
4447         break;
4448     case TEMP_EBB:
4449         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4450         break;
4451     case TEMP_CONST:
4452         new_type = TEMP_VAL_CONST;
4453         break;
4454     default:
4455         g_assert_not_reached();
4456     }
4457     set_temp_val_nonreg(s, ts, new_type);
4458 }
4459 
4460 /* Mark a temporary as dead.  */
4461 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4462 {
4463     temp_free_or_dead(s, ts, 1);
4464 }
4465 
4466 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4467    registers needs to be allocated to store a constant.  If 'free_or_dead'
4468    is non-zero, subsequently release the temporary; if it is positive, the
4469    temp is dead; if it is negative, the temp is free.  */
4470 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4471                       TCGRegSet preferred_regs, int free_or_dead)
4472 {
4473     if (!temp_readonly(ts) && !ts->mem_coherent) {
4474         if (!ts->mem_allocated) {
4475             temp_allocate_frame(s, ts);
4476         }
4477         switch (ts->val_type) {
4478         case TEMP_VAL_CONST:
4479             /* If we're going to free the temp immediately, then we won't
4480                require it later in a register, so attempt to store the
4481                constant to memory directly.  */
4482             if (free_or_dead
4483                 && tcg_out_sti(s, ts->type, ts->val,
4484                                ts->mem_base->reg, ts->mem_offset)) {
4485                 break;
4486             }
4487             temp_load(s, ts, tcg_target_available_regs[ts->type],
4488                       allocated_regs, preferred_regs);
4489             /* fallthrough */
4490 
4491         case TEMP_VAL_REG:
4492             tcg_out_st(s, ts->type, ts->reg,
4493                        ts->mem_base->reg, ts->mem_offset);
4494             break;
4495 
4496         case TEMP_VAL_MEM:
4497             break;
4498 
4499         case TEMP_VAL_DEAD:
4500         default:
4501             g_assert_not_reached();
4502         }
4503         ts->mem_coherent = 1;
4504     }
4505     if (free_or_dead) {
4506         temp_free_or_dead(s, ts, free_or_dead);
4507     }
4508 }
4509 
4510 /* free register 'reg' by spilling the corresponding temporary if necessary */
4511 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4512 {
4513     TCGTemp *ts = s->reg_to_temp[reg];
4514     if (ts != NULL) {
4515         temp_sync(s, ts, allocated_regs, 0, -1);
4516     }
4517 }
4518 
4519 /**
4520  * tcg_reg_alloc:
4521  * @required_regs: Set of registers in which we must allocate.
4522  * @allocated_regs: Set of registers which must be avoided.
4523  * @preferred_regs: Set of registers we should prefer.
4524  * @rev: True if we search the registers in "indirect" order.
4525  *
4526  * The allocated register must be in @required_regs & ~@allocated_regs,
4527  * but if we can put it in @preferred_regs we may save a move later.
4528  */
4529 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4530                             TCGRegSet allocated_regs,
4531                             TCGRegSet preferred_regs, bool rev)
4532 {
4533     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4534     TCGRegSet reg_ct[2];
4535     const int *order;
4536 
4537     reg_ct[1] = required_regs & ~allocated_regs;
4538     tcg_debug_assert(reg_ct[1] != 0);
4539     reg_ct[0] = reg_ct[1] & preferred_regs;
4540 
4541     /* Skip the preferred_regs option if it cannot be satisfied,
4542        or if the preference made no difference.  */
4543     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4544 
4545     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4546 
4547     /* Try free registers, preferences first.  */
4548     for (j = f; j < 2; j++) {
4549         TCGRegSet set = reg_ct[j];
4550 
4551         if (tcg_regset_single(set)) {
4552             /* One register in the set.  */
4553             TCGReg reg = tcg_regset_first(set);
4554             if (s->reg_to_temp[reg] == NULL) {
4555                 return reg;
4556             }
4557         } else {
4558             for (i = 0; i < n; i++) {
4559                 TCGReg reg = order[i];
4560                 if (s->reg_to_temp[reg] == NULL &&
4561                     tcg_regset_test_reg(set, reg)) {
4562                     return reg;
4563                 }
4564             }
4565         }
4566     }
4567 
4568     /* We must spill something.  */
4569     for (j = f; j < 2; j++) {
4570         TCGRegSet set = reg_ct[j];
4571 
4572         if (tcg_regset_single(set)) {
4573             /* One register in the set.  */
4574             TCGReg reg = tcg_regset_first(set);
4575             tcg_reg_free(s, reg, allocated_regs);
4576             return reg;
4577         } else {
4578             for (i = 0; i < n; i++) {
4579                 TCGReg reg = order[i];
4580                 if (tcg_regset_test_reg(set, reg)) {
4581                     tcg_reg_free(s, reg, allocated_regs);
4582                     return reg;
4583                 }
4584             }
4585         }
4586     }
4587 
4588     g_assert_not_reached();
4589 }
4590 
4591 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4592                                  TCGRegSet allocated_regs,
4593                                  TCGRegSet preferred_regs, bool rev)
4594 {
4595     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4596     TCGRegSet reg_ct[2];
4597     const int *order;
4598 
4599     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4600     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4601     tcg_debug_assert(reg_ct[1] != 0);
4602     reg_ct[0] = reg_ct[1] & preferred_regs;
4603 
4604     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4605 
4606     /*
4607      * Skip the preferred_regs option if it cannot be satisfied,
4608      * or if the preference made no difference.
4609      */
4610     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4611 
4612     /*
4613      * Minimize the number of flushes by looking for 2 free registers first,
4614      * then a single flush, then two flushes.
4615      */
4616     for (fmin = 2; fmin >= 0; fmin--) {
4617         for (j = k; j < 2; j++) {
4618             TCGRegSet set = reg_ct[j];
4619 
4620             for (i = 0; i < n; i++) {
4621                 TCGReg reg = order[i];
4622 
4623                 if (tcg_regset_test_reg(set, reg)) {
4624                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4625                     if (f >= fmin) {
4626                         tcg_reg_free(s, reg, allocated_regs);
4627                         tcg_reg_free(s, reg + 1, allocated_regs);
4628                         return reg;
4629                     }
4630                 }
4631             }
4632         }
4633     }
4634     g_assert_not_reached();
4635 }
4636 
4637 /* Make sure the temporary is in a register.  If needed, allocate the register
4638    from DESIRED while avoiding ALLOCATED.  */
4639 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4640                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4641 {
4642     TCGReg reg;
4643 
4644     switch (ts->val_type) {
4645     case TEMP_VAL_REG:
4646         return;
4647     case TEMP_VAL_CONST:
4648         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4649                             preferred_regs, ts->indirect_base);
4650         if (ts->type <= TCG_TYPE_I64) {
4651             tcg_out_movi(s, ts->type, reg, ts->val);
4652         } else {
4653             uint64_t val = ts->val;
4654             MemOp vece = MO_64;
4655 
4656             /*
4657              * Find the minimal vector element that matches the constant.
4658              * The targets will, in general, have to do this search anyway,
4659              * do this generically.
4660              */
4661             if (val == dup_const(MO_8, val)) {
4662                 vece = MO_8;
4663             } else if (val == dup_const(MO_16, val)) {
4664                 vece = MO_16;
4665             } else if (val == dup_const(MO_32, val)) {
4666                 vece = MO_32;
4667             }
4668 
4669             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4670         }
4671         ts->mem_coherent = 0;
4672         break;
4673     case TEMP_VAL_MEM:
4674         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4675                             preferred_regs, ts->indirect_base);
4676         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4677         ts->mem_coherent = 1;
4678         break;
4679     case TEMP_VAL_DEAD:
4680     default:
4681         g_assert_not_reached();
4682     }
4683     set_temp_val_reg(s, ts, reg);
4684 }
4685 
4686 /* Save a temporary to memory. 'allocated_regs' is used in case a
4687    temporary registers needs to be allocated to store a constant.  */
4688 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4689 {
4690     /* The liveness analysis already ensures that globals are back
4691        in memory. Keep an tcg_debug_assert for safety. */
4692     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4693 }
4694 
4695 /* save globals to their canonical location and assume they can be
4696    modified be the following code. 'allocated_regs' is used in case a
4697    temporary registers needs to be allocated to store a constant. */
4698 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4699 {
4700     int i, n;
4701 
4702     for (i = 0, n = s->nb_globals; i < n; i++) {
4703         temp_save(s, &s->temps[i], allocated_regs);
4704     }
4705 }
4706 
4707 /* sync globals to their canonical location and assume they can be
4708    read by the following code. 'allocated_regs' is used in case a
4709    temporary registers needs to be allocated to store a constant. */
4710 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4711 {
4712     int i, n;
4713 
4714     for (i = 0, n = s->nb_globals; i < n; i++) {
4715         TCGTemp *ts = &s->temps[i];
4716         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4717                          || ts->kind == TEMP_FIXED
4718                          || ts->mem_coherent);
4719     }
4720 }
4721 
4722 /* at the end of a basic block, we assume all temporaries are dead and
4723    all globals are stored at their canonical location. */
4724 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4725 {
4726     int i;
4727 
4728     for (i = s->nb_globals; i < s->nb_temps; i++) {
4729         TCGTemp *ts = &s->temps[i];
4730 
4731         switch (ts->kind) {
4732         case TEMP_TB:
4733             temp_save(s, ts, allocated_regs);
4734             break;
4735         case TEMP_EBB:
4736             /* The liveness analysis already ensures that temps are dead.
4737                Keep an tcg_debug_assert for safety. */
4738             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4739             break;
4740         case TEMP_CONST:
4741             /* Similarly, we should have freed any allocated register. */
4742             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4743             break;
4744         default:
4745             g_assert_not_reached();
4746         }
4747     }
4748 
4749     save_globals(s, allocated_regs);
4750 }
4751 
4752 /*
4753  * At a conditional branch, we assume all temporaries are dead unless
4754  * explicitly live-across-conditional-branch; all globals and local
4755  * temps are synced to their location.
4756  */
4757 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4758 {
4759     sync_globals(s, allocated_regs);
4760 
4761     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4762         TCGTemp *ts = &s->temps[i];
4763         /*
4764          * The liveness analysis already ensures that temps are dead.
4765          * Keep tcg_debug_asserts for safety.
4766          */
4767         switch (ts->kind) {
4768         case TEMP_TB:
4769             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4770             break;
4771         case TEMP_EBB:
4772         case TEMP_CONST:
4773             break;
4774         default:
4775             g_assert_not_reached();
4776         }
4777     }
4778 }
4779 
4780 /*
4781  * Specialized code generation for INDEX_op_mov_* with a constant.
4782  */
4783 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4784                                   tcg_target_ulong val, TCGLifeData arg_life,
4785                                   TCGRegSet preferred_regs)
4786 {
4787     /* ENV should not be modified.  */
4788     tcg_debug_assert(!temp_readonly(ots));
4789 
4790     /* The movi is not explicitly generated here.  */
4791     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4792     ots->val = val;
4793     ots->mem_coherent = 0;
4794     if (NEED_SYNC_ARG(0)) {
4795         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4796     } else if (IS_DEAD_ARG(0)) {
4797         temp_dead(s, ots);
4798     }
4799 }
4800 
4801 /*
4802  * Specialized code generation for INDEX_op_mov_*.
4803  */
4804 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4805 {
4806     const TCGLifeData arg_life = op->life;
4807     TCGRegSet allocated_regs, preferred_regs;
4808     TCGTemp *ts, *ots;
4809     TCGType otype, itype;
4810     TCGReg oreg, ireg;
4811 
4812     allocated_regs = s->reserved_regs;
4813     preferred_regs = output_pref(op, 0);
4814     ots = arg_temp(op->args[0]);
4815     ts = arg_temp(op->args[1]);
4816 
4817     /* ENV should not be modified.  */
4818     tcg_debug_assert(!temp_readonly(ots));
4819 
4820     /* Note that otype != itype for no-op truncation.  */
4821     otype = ots->type;
4822     itype = ts->type;
4823 
4824     if (ts->val_type == TEMP_VAL_CONST) {
4825         /* propagate constant or generate sti */
4826         tcg_target_ulong val = ts->val;
4827         if (IS_DEAD_ARG(1)) {
4828             temp_dead(s, ts);
4829         }
4830         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4831         return;
4832     }
4833 
4834     /* If the source value is in memory we're going to be forced
4835        to have it in a register in order to perform the copy.  Copy
4836        the SOURCE value into its own register first, that way we
4837        don't have to reload SOURCE the next time it is used. */
4838     if (ts->val_type == TEMP_VAL_MEM) {
4839         temp_load(s, ts, tcg_target_available_regs[itype],
4840                   allocated_regs, preferred_regs);
4841     }
4842     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4843     ireg = ts->reg;
4844 
4845     if (IS_DEAD_ARG(0)) {
4846         /* mov to a non-saved dead register makes no sense (even with
4847            liveness analysis disabled). */
4848         tcg_debug_assert(NEED_SYNC_ARG(0));
4849         if (!ots->mem_allocated) {
4850             temp_allocate_frame(s, ots);
4851         }
4852         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4853         if (IS_DEAD_ARG(1)) {
4854             temp_dead(s, ts);
4855         }
4856         temp_dead(s, ots);
4857         return;
4858     }
4859 
4860     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4861         /*
4862          * The mov can be suppressed.  Kill input first, so that it
4863          * is unlinked from reg_to_temp, then set the output to the
4864          * reg that we saved from the input.
4865          */
4866         temp_dead(s, ts);
4867         oreg = ireg;
4868     } else {
4869         if (ots->val_type == TEMP_VAL_REG) {
4870             oreg = ots->reg;
4871         } else {
4872             /* Make sure to not spill the input register during allocation. */
4873             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4874                                  allocated_regs | ((TCGRegSet)1 << ireg),
4875                                  preferred_regs, ots->indirect_base);
4876         }
4877         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4878             /*
4879              * Cross register class move not supported.
4880              * Store the source register into the destination slot
4881              * and leave the destination temp as TEMP_VAL_MEM.
4882              */
4883             assert(!temp_readonly(ots));
4884             if (!ts->mem_allocated) {
4885                 temp_allocate_frame(s, ots);
4886             }
4887             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4888             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4889             ots->mem_coherent = 1;
4890             return;
4891         }
4892     }
4893     set_temp_val_reg(s, ots, oreg);
4894     ots->mem_coherent = 0;
4895 
4896     if (NEED_SYNC_ARG(0)) {
4897         temp_sync(s, ots, allocated_regs, 0, 0);
4898     }
4899 }
4900 
4901 /*
4902  * Specialized code generation for INDEX_op_dup_vec.
4903  */
4904 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4905 {
4906     const TCGLifeData arg_life = op->life;
4907     TCGRegSet dup_out_regs, dup_in_regs;
4908     const TCGArgConstraint *dup_args_ct;
4909     TCGTemp *its, *ots;
4910     TCGType itype, vtype;
4911     unsigned vece;
4912     int lowpart_ofs;
4913     bool ok;
4914 
4915     ots = arg_temp(op->args[0]);
4916     its = arg_temp(op->args[1]);
4917 
4918     /* ENV should not be modified.  */
4919     tcg_debug_assert(!temp_readonly(ots));
4920 
4921     itype = its->type;
4922     vece = TCGOP_VECE(op);
4923     vtype = TCGOP_TYPE(op);
4924 
4925     if (its->val_type == TEMP_VAL_CONST) {
4926         /* Propagate constant via movi -> dupi.  */
4927         tcg_target_ulong val = its->val;
4928         if (IS_DEAD_ARG(1)) {
4929             temp_dead(s, its);
4930         }
4931         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4932         return;
4933     }
4934 
4935     dup_args_ct = opcode_args_ct(op);
4936     dup_out_regs = dup_args_ct[0].regs;
4937     dup_in_regs = dup_args_ct[1].regs;
4938 
4939     /* Allocate the output register now.  */
4940     if (ots->val_type != TEMP_VAL_REG) {
4941         TCGRegSet allocated_regs = s->reserved_regs;
4942         TCGReg oreg;
4943 
4944         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4945             /* Make sure to not spill the input register. */
4946             tcg_regset_set_reg(allocated_regs, its->reg);
4947         }
4948         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4949                              output_pref(op, 0), ots->indirect_base);
4950         set_temp_val_reg(s, ots, oreg);
4951     }
4952 
4953     switch (its->val_type) {
4954     case TEMP_VAL_REG:
4955         /*
4956          * The dup constriaints must be broad, covering all possible VECE.
4957          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4958          * to fail, indicating that extra moves are required for that case.
4959          */
4960         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4961             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4962                 goto done;
4963             }
4964             /* Try again from memory or a vector input register.  */
4965         }
4966         if (!its->mem_coherent) {
4967             /*
4968              * The input register is not synced, and so an extra store
4969              * would be required to use memory.  Attempt an integer-vector
4970              * register move first.  We do not have a TCGRegSet for this.
4971              */
4972             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4973                 break;
4974             }
4975             /* Sync the temp back to its slot and load from there.  */
4976             temp_sync(s, its, s->reserved_regs, 0, 0);
4977         }
4978         /* fall through */
4979 
4980     case TEMP_VAL_MEM:
4981         lowpart_ofs = 0;
4982         if (HOST_BIG_ENDIAN) {
4983             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4984         }
4985         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4986                              its->mem_offset + lowpart_ofs)) {
4987             goto done;
4988         }
4989         /* Load the input into the destination vector register. */
4990         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4991         break;
4992 
4993     default:
4994         g_assert_not_reached();
4995     }
4996 
4997     /* We now have a vector input register, so dup must succeed. */
4998     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4999     tcg_debug_assert(ok);
5000 
5001  done:
5002     ots->mem_coherent = 0;
5003     if (IS_DEAD_ARG(1)) {
5004         temp_dead(s, its);
5005     }
5006     if (NEED_SYNC_ARG(0)) {
5007         temp_sync(s, ots, s->reserved_regs, 0, 0);
5008     }
5009     if (IS_DEAD_ARG(0)) {
5010         temp_dead(s, ots);
5011     }
5012 }
5013 
5014 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5015 {
5016     const TCGLifeData arg_life = op->life;
5017     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5018     TCGRegSet i_allocated_regs;
5019     TCGRegSet o_allocated_regs;
5020     int i, k, nb_iargs, nb_oargs;
5021     TCGReg reg;
5022     TCGArg arg;
5023     const TCGArgConstraint *args_ct;
5024     const TCGArgConstraint *arg_ct;
5025     TCGTemp *ts;
5026     TCGArg new_args[TCG_MAX_OP_ARGS];
5027     int const_args[TCG_MAX_OP_ARGS];
5028     TCGCond op_cond;
5029 
5030     nb_oargs = def->nb_oargs;
5031     nb_iargs = def->nb_iargs;
5032 
5033     /* copy constants */
5034     memcpy(new_args + nb_oargs + nb_iargs,
5035            op->args + nb_oargs + nb_iargs,
5036            sizeof(TCGArg) * def->nb_cargs);
5037 
5038     i_allocated_regs = s->reserved_regs;
5039     o_allocated_regs = s->reserved_regs;
5040 
5041     switch (op->opc) {
5042     case INDEX_op_brcond_i32:
5043     case INDEX_op_brcond_i64:
5044         op_cond = op->args[2];
5045         break;
5046     case INDEX_op_setcond_i32:
5047     case INDEX_op_setcond_i64:
5048     case INDEX_op_negsetcond_i32:
5049     case INDEX_op_negsetcond_i64:
5050     case INDEX_op_cmp_vec:
5051         op_cond = op->args[3];
5052         break;
5053     case INDEX_op_brcond2_i32:
5054         op_cond = op->args[4];
5055         break;
5056     case INDEX_op_movcond_i32:
5057     case INDEX_op_movcond_i64:
5058     case INDEX_op_setcond2_i32:
5059     case INDEX_op_cmpsel_vec:
5060         op_cond = op->args[5];
5061         break;
5062     default:
5063         /* No condition within opcode. */
5064         op_cond = TCG_COND_ALWAYS;
5065         break;
5066     }
5067 
5068     args_ct = opcode_args_ct(op);
5069 
5070     /* satisfy input constraints */
5071     for (k = 0; k < nb_iargs; k++) {
5072         TCGRegSet i_preferred_regs, i_required_regs;
5073         bool allocate_new_reg, copyto_new_reg;
5074         TCGTemp *ts2;
5075         int i1, i2;
5076 
5077         i = args_ct[nb_oargs + k].sort_index;
5078         arg = op->args[i];
5079         arg_ct = &args_ct[i];
5080         ts = arg_temp(arg);
5081 
5082         if (ts->val_type == TEMP_VAL_CONST) {
5083 #ifdef TCG_REG_ZERO
5084             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5085                 /* Hardware zero register: indicate register via non-const. */
5086                 const_args[i] = 0;
5087                 new_args[i] = TCG_REG_ZERO;
5088                 continue;
5089             }
5090 #endif
5091 
5092             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5093                                        op_cond, TCGOP_VECE(op))) {
5094                 /* constant is OK for instruction */
5095                 const_args[i] = 1;
5096                 new_args[i] = ts->val;
5097                 continue;
5098             }
5099         }
5100 
5101         reg = ts->reg;
5102         i_preferred_regs = 0;
5103         i_required_regs = arg_ct->regs;
5104         allocate_new_reg = false;
5105         copyto_new_reg = false;
5106 
5107         switch (arg_ct->pair) {
5108         case 0: /* not paired */
5109             if (arg_ct->ialias) {
5110                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5111 
5112                 /*
5113                  * If the input is readonly, then it cannot also be an
5114                  * output and aliased to itself.  If the input is not
5115                  * dead after the instruction, we must allocate a new
5116                  * register and move it.
5117                  */
5118                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5119                     || args_ct[arg_ct->alias_index].newreg) {
5120                     allocate_new_reg = true;
5121                 } else if (ts->val_type == TEMP_VAL_REG) {
5122                     /*
5123                      * Check if the current register has already been
5124                      * allocated for another input.
5125                      */
5126                     allocate_new_reg =
5127                         tcg_regset_test_reg(i_allocated_regs, reg);
5128                 }
5129             }
5130             if (!allocate_new_reg) {
5131                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5132                           i_preferred_regs);
5133                 reg = ts->reg;
5134                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5135             }
5136             if (allocate_new_reg) {
5137                 /*
5138                  * Allocate a new register matching the constraint
5139                  * and move the temporary register into it.
5140                  */
5141                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5142                           i_allocated_regs, 0);
5143                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5144                                     i_preferred_regs, ts->indirect_base);
5145                 copyto_new_reg = true;
5146             }
5147             break;
5148 
5149         case 1:
5150             /* First of an input pair; if i1 == i2, the second is an output. */
5151             i1 = i;
5152             i2 = arg_ct->pair_index;
5153             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5154 
5155             /*
5156              * It is easier to default to allocating a new pair
5157              * and to identify a few cases where it's not required.
5158              */
5159             if (arg_ct->ialias) {
5160                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5161                 if (IS_DEAD_ARG(i1) &&
5162                     IS_DEAD_ARG(i2) &&
5163                     !temp_readonly(ts) &&
5164                     ts->val_type == TEMP_VAL_REG &&
5165                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5166                     tcg_regset_test_reg(i_required_regs, reg) &&
5167                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5168                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5169                     (ts2
5170                      ? ts2->val_type == TEMP_VAL_REG &&
5171                        ts2->reg == reg + 1 &&
5172                        !temp_readonly(ts2)
5173                      : s->reg_to_temp[reg + 1] == NULL)) {
5174                     break;
5175                 }
5176             } else {
5177                 /* Without aliasing, the pair must also be an input. */
5178                 tcg_debug_assert(ts2);
5179                 if (ts->val_type == TEMP_VAL_REG &&
5180                     ts2->val_type == TEMP_VAL_REG &&
5181                     ts2->reg == reg + 1 &&
5182                     tcg_regset_test_reg(i_required_regs, reg)) {
5183                     break;
5184                 }
5185             }
5186             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5187                                      0, ts->indirect_base);
5188             goto do_pair;
5189 
5190         case 2: /* pair second */
5191             reg = new_args[arg_ct->pair_index] + 1;
5192             goto do_pair;
5193 
5194         case 3: /* ialias with second output, no first input */
5195             tcg_debug_assert(arg_ct->ialias);
5196             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5197 
5198             if (IS_DEAD_ARG(i) &&
5199                 !temp_readonly(ts) &&
5200                 ts->val_type == TEMP_VAL_REG &&
5201                 reg > 0 &&
5202                 s->reg_to_temp[reg - 1] == NULL &&
5203                 tcg_regset_test_reg(i_required_regs, reg) &&
5204                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5205                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5206                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5207                 break;
5208             }
5209             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5210                                      i_allocated_regs, 0,
5211                                      ts->indirect_base);
5212             tcg_regset_set_reg(i_allocated_regs, reg);
5213             reg += 1;
5214             goto do_pair;
5215 
5216         do_pair:
5217             /*
5218              * If an aliased input is not dead after the instruction,
5219              * we must allocate a new register and move it.
5220              */
5221             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5222                 TCGRegSet t_allocated_regs = i_allocated_regs;
5223 
5224                 /*
5225                  * Because of the alias, and the continued life, make sure
5226                  * that the temp is somewhere *other* than the reg pair,
5227                  * and we get a copy in reg.
5228                  */
5229                 tcg_regset_set_reg(t_allocated_regs, reg);
5230                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5231                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5232                     /* If ts was already in reg, copy it somewhere else. */
5233                     TCGReg nr;
5234                     bool ok;
5235 
5236                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5237                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5238                                        t_allocated_regs, 0, ts->indirect_base);
5239                     ok = tcg_out_mov(s, ts->type, nr, reg);
5240                     tcg_debug_assert(ok);
5241 
5242                     set_temp_val_reg(s, ts, nr);
5243                 } else {
5244                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5245                               t_allocated_regs, 0);
5246                     copyto_new_reg = true;
5247                 }
5248             } else {
5249                 /* Preferably allocate to reg, otherwise copy. */
5250                 i_required_regs = (TCGRegSet)1 << reg;
5251                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5252                           i_preferred_regs);
5253                 copyto_new_reg = ts->reg != reg;
5254             }
5255             break;
5256 
5257         default:
5258             g_assert_not_reached();
5259         }
5260 
5261         if (copyto_new_reg) {
5262             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5263                 /*
5264                  * Cross register class move not supported.  Sync the
5265                  * temp back to its slot and load from there.
5266                  */
5267                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5268                 tcg_out_ld(s, ts->type, reg,
5269                            ts->mem_base->reg, ts->mem_offset);
5270             }
5271         }
5272         new_args[i] = reg;
5273         const_args[i] = 0;
5274         tcg_regset_set_reg(i_allocated_regs, reg);
5275     }
5276 
5277     /* mark dead temporaries and free the associated registers */
5278     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5279         if (IS_DEAD_ARG(i)) {
5280             temp_dead(s, arg_temp(op->args[i]));
5281         }
5282     }
5283 
5284     if (def->flags & TCG_OPF_COND_BRANCH) {
5285         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5286     } else if (def->flags & TCG_OPF_BB_END) {
5287         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5288     } else {
5289         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5290             /* XXX: permit generic clobber register list ? */
5291             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5292                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5293                     tcg_reg_free(s, i, i_allocated_regs);
5294                 }
5295             }
5296         }
5297         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5298             /* sync globals if the op has side effects and might trigger
5299                an exception. */
5300             sync_globals(s, i_allocated_regs);
5301         }
5302 
5303         /* satisfy the output constraints */
5304         for (k = 0; k < nb_oargs; k++) {
5305             i = args_ct[k].sort_index;
5306             arg = op->args[i];
5307             arg_ct = &args_ct[i];
5308             ts = arg_temp(arg);
5309 
5310             /* ENV should not be modified.  */
5311             tcg_debug_assert(!temp_readonly(ts));
5312 
5313             switch (arg_ct->pair) {
5314             case 0: /* not paired */
5315                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5316                     reg = new_args[arg_ct->alias_index];
5317                 } else if (arg_ct->newreg) {
5318                     reg = tcg_reg_alloc(s, arg_ct->regs,
5319                                         i_allocated_regs | o_allocated_regs,
5320                                         output_pref(op, k), ts->indirect_base);
5321                 } else {
5322                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5323                                         output_pref(op, k), ts->indirect_base);
5324                 }
5325                 break;
5326 
5327             case 1: /* first of pair */
5328                 if (arg_ct->oalias) {
5329                     reg = new_args[arg_ct->alias_index];
5330                 } else if (arg_ct->newreg) {
5331                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5332                                              i_allocated_regs | o_allocated_regs,
5333                                              output_pref(op, k),
5334                                              ts->indirect_base);
5335                 } else {
5336                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5337                                              output_pref(op, k),
5338                                              ts->indirect_base);
5339                 }
5340                 break;
5341 
5342             case 2: /* second of pair */
5343                 if (arg_ct->oalias) {
5344                     reg = new_args[arg_ct->alias_index];
5345                 } else {
5346                     reg = new_args[arg_ct->pair_index] + 1;
5347                 }
5348                 break;
5349 
5350             case 3: /* first of pair, aliasing with a second input */
5351                 tcg_debug_assert(!arg_ct->newreg);
5352                 reg = new_args[arg_ct->pair_index] - 1;
5353                 break;
5354 
5355             default:
5356                 g_assert_not_reached();
5357             }
5358             tcg_regset_set_reg(o_allocated_regs, reg);
5359             set_temp_val_reg(s, ts, reg);
5360             ts->mem_coherent = 0;
5361             new_args[i] = reg;
5362         }
5363     }
5364 
5365     /* emit instruction */
5366     switch (op->opc) {
5367     case INDEX_op_ext8s_i32:
5368         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5369         break;
5370     case INDEX_op_ext8s_i64:
5371         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5372         break;
5373     case INDEX_op_ext8u_i32:
5374     case INDEX_op_ext8u_i64:
5375         tcg_out_ext8u(s, new_args[0], new_args[1]);
5376         break;
5377     case INDEX_op_ext16s_i32:
5378         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5379         break;
5380     case INDEX_op_ext16s_i64:
5381         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5382         break;
5383     case INDEX_op_ext16u_i32:
5384     case INDEX_op_ext16u_i64:
5385         tcg_out_ext16u(s, new_args[0], new_args[1]);
5386         break;
5387     case INDEX_op_ext32s_i64:
5388         tcg_out_ext32s(s, new_args[0], new_args[1]);
5389         break;
5390     case INDEX_op_ext32u_i64:
5391         tcg_out_ext32u(s, new_args[0], new_args[1]);
5392         break;
5393     case INDEX_op_ext_i32_i64:
5394         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5395         break;
5396     case INDEX_op_extu_i32_i64:
5397         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5398         break;
5399     case INDEX_op_extrl_i64_i32:
5400         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5401         break;
5402     default:
5403         if (def->flags & TCG_OPF_VECTOR) {
5404             tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
5405                            TCGOP_VECE(op), new_args, const_args);
5406         } else {
5407             tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
5408         }
5409         break;
5410     }
5411 
5412     /* move the outputs in the correct register if needed */
5413     for(i = 0; i < nb_oargs; i++) {
5414         ts = arg_temp(op->args[i]);
5415 
5416         /* ENV should not be modified.  */
5417         tcg_debug_assert(!temp_readonly(ts));
5418 
5419         if (NEED_SYNC_ARG(i)) {
5420             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5421         } else if (IS_DEAD_ARG(i)) {
5422             temp_dead(s, ts);
5423         }
5424     }
5425 }
5426 
5427 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5428 {
5429     const TCGLifeData arg_life = op->life;
5430     TCGTemp *ots, *itsl, *itsh;
5431     TCGType vtype = TCGOP_TYPE(op);
5432 
5433     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5434     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5435     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5436 
5437     ots = arg_temp(op->args[0]);
5438     itsl = arg_temp(op->args[1]);
5439     itsh = arg_temp(op->args[2]);
5440 
5441     /* ENV should not be modified.  */
5442     tcg_debug_assert(!temp_readonly(ots));
5443 
5444     /* Allocate the output register now.  */
5445     if (ots->val_type != TEMP_VAL_REG) {
5446         TCGRegSet allocated_regs = s->reserved_regs;
5447         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5448         TCGReg oreg;
5449 
5450         /* Make sure to not spill the input registers. */
5451         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5452             tcg_regset_set_reg(allocated_regs, itsl->reg);
5453         }
5454         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5455             tcg_regset_set_reg(allocated_regs, itsh->reg);
5456         }
5457 
5458         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5459                              output_pref(op, 0), ots->indirect_base);
5460         set_temp_val_reg(s, ots, oreg);
5461     }
5462 
5463     /* Promote dup2 of immediates to dupi_vec. */
5464     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5465         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5466         MemOp vece = MO_64;
5467 
5468         if (val == dup_const(MO_8, val)) {
5469             vece = MO_8;
5470         } else if (val == dup_const(MO_16, val)) {
5471             vece = MO_16;
5472         } else if (val == dup_const(MO_32, val)) {
5473             vece = MO_32;
5474         }
5475 
5476         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5477         goto done;
5478     }
5479 
5480     /* If the two inputs form one 64-bit value, try dupm_vec. */
5481     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5482         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5483         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5484         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5485 
5486         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5487         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5488 
5489         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5490                              its->mem_base->reg, its->mem_offset)) {
5491             goto done;
5492         }
5493     }
5494 
5495     /* Fall back to generic expansion. */
5496     return false;
5497 
5498  done:
5499     ots->mem_coherent = 0;
5500     if (IS_DEAD_ARG(1)) {
5501         temp_dead(s, itsl);
5502     }
5503     if (IS_DEAD_ARG(2)) {
5504         temp_dead(s, itsh);
5505     }
5506     if (NEED_SYNC_ARG(0)) {
5507         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5508     } else if (IS_DEAD_ARG(0)) {
5509         temp_dead(s, ots);
5510     }
5511     return true;
5512 }
5513 
5514 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5515                          TCGRegSet allocated_regs)
5516 {
5517     if (ts->val_type == TEMP_VAL_REG) {
5518         if (ts->reg != reg) {
5519             tcg_reg_free(s, reg, allocated_regs);
5520             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5521                 /*
5522                  * Cross register class move not supported.  Sync the
5523                  * temp back to its slot and load from there.
5524                  */
5525                 temp_sync(s, ts, allocated_regs, 0, 0);
5526                 tcg_out_ld(s, ts->type, reg,
5527                            ts->mem_base->reg, ts->mem_offset);
5528             }
5529         }
5530     } else {
5531         TCGRegSet arg_set = 0;
5532 
5533         tcg_reg_free(s, reg, allocated_regs);
5534         tcg_regset_set_reg(arg_set, reg);
5535         temp_load(s, ts, arg_set, allocated_regs, 0);
5536     }
5537 }
5538 
5539 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5540                          TCGRegSet allocated_regs)
5541 {
5542     /*
5543      * When the destination is on the stack, load up the temp and store.
5544      * If there are many call-saved registers, the temp might live to
5545      * see another use; otherwise it'll be discarded.
5546      */
5547     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5548     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5549                arg_slot_stk_ofs(arg_slot));
5550 }
5551 
5552 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5553                             TCGTemp *ts, TCGRegSet *allocated_regs)
5554 {
5555     if (arg_slot_reg_p(l->arg_slot)) {
5556         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5557         load_arg_reg(s, reg, ts, *allocated_regs);
5558         tcg_regset_set_reg(*allocated_regs, reg);
5559     } else {
5560         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5561     }
5562 }
5563 
5564 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5565                          intptr_t ref_off, TCGRegSet *allocated_regs)
5566 {
5567     TCGReg reg;
5568 
5569     if (arg_slot_reg_p(arg_slot)) {
5570         reg = tcg_target_call_iarg_regs[arg_slot];
5571         tcg_reg_free(s, reg, *allocated_regs);
5572         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5573         tcg_regset_set_reg(*allocated_regs, reg);
5574     } else {
5575         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5576                             *allocated_regs, 0, false);
5577         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5578         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5579                    arg_slot_stk_ofs(arg_slot));
5580     }
5581 }
5582 
5583 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5584 {
5585     const int nb_oargs = TCGOP_CALLO(op);
5586     const int nb_iargs = TCGOP_CALLI(op);
5587     const TCGLifeData arg_life = op->life;
5588     const TCGHelperInfo *info = tcg_call_info(op);
5589     TCGRegSet allocated_regs = s->reserved_regs;
5590     int i;
5591 
5592     /*
5593      * Move inputs into place in reverse order,
5594      * so that we place stacked arguments first.
5595      */
5596     for (i = nb_iargs - 1; i >= 0; --i) {
5597         const TCGCallArgumentLoc *loc = &info->in[i];
5598         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5599 
5600         switch (loc->kind) {
5601         case TCG_CALL_ARG_NORMAL:
5602         case TCG_CALL_ARG_EXTEND_U:
5603         case TCG_CALL_ARG_EXTEND_S:
5604             load_arg_normal(s, loc, ts, &allocated_regs);
5605             break;
5606         case TCG_CALL_ARG_BY_REF:
5607             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5608             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5609                          arg_slot_stk_ofs(loc->ref_slot),
5610                          &allocated_regs);
5611             break;
5612         case TCG_CALL_ARG_BY_REF_N:
5613             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5614             break;
5615         default:
5616             g_assert_not_reached();
5617         }
5618     }
5619 
5620     /* Mark dead temporaries and free the associated registers.  */
5621     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5622         if (IS_DEAD_ARG(i)) {
5623             temp_dead(s, arg_temp(op->args[i]));
5624         }
5625     }
5626 
5627     /* Clobber call registers.  */
5628     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5629         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5630             tcg_reg_free(s, i, allocated_regs);
5631         }
5632     }
5633 
5634     /*
5635      * Save globals if they might be written by the helper,
5636      * sync them if they might be read.
5637      */
5638     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5639         /* Nothing to do */
5640     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5641         sync_globals(s, allocated_regs);
5642     } else {
5643         save_globals(s, allocated_regs);
5644     }
5645 
5646     /*
5647      * If the ABI passes a pointer to the returned struct as the first
5648      * argument, load that now.  Pass a pointer to the output home slot.
5649      */
5650     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5651         TCGTemp *ts = arg_temp(op->args[0]);
5652 
5653         if (!ts->mem_allocated) {
5654             temp_allocate_frame(s, ts);
5655         }
5656         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5657     }
5658 
5659     tcg_out_call(s, tcg_call_func(op), info);
5660 
5661     /* Assign output registers and emit moves if needed.  */
5662     switch (info->out_kind) {
5663     case TCG_CALL_RET_NORMAL:
5664         for (i = 0; i < nb_oargs; i++) {
5665             TCGTemp *ts = arg_temp(op->args[i]);
5666             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5667 
5668             /* ENV should not be modified.  */
5669             tcg_debug_assert(!temp_readonly(ts));
5670 
5671             set_temp_val_reg(s, ts, reg);
5672             ts->mem_coherent = 0;
5673         }
5674         break;
5675 
5676     case TCG_CALL_RET_BY_VEC:
5677         {
5678             TCGTemp *ts = arg_temp(op->args[0]);
5679 
5680             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5681             tcg_debug_assert(ts->temp_subindex == 0);
5682             if (!ts->mem_allocated) {
5683                 temp_allocate_frame(s, ts);
5684             }
5685             tcg_out_st(s, TCG_TYPE_V128,
5686                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5687                        ts->mem_base->reg, ts->mem_offset);
5688         }
5689         /* fall through to mark all parts in memory */
5690 
5691     case TCG_CALL_RET_BY_REF:
5692         /* The callee has performed a write through the reference. */
5693         for (i = 0; i < nb_oargs; i++) {
5694             TCGTemp *ts = arg_temp(op->args[i]);
5695             ts->val_type = TEMP_VAL_MEM;
5696         }
5697         break;
5698 
5699     default:
5700         g_assert_not_reached();
5701     }
5702 
5703     /* Flush or discard output registers as needed. */
5704     for (i = 0; i < nb_oargs; i++) {
5705         TCGTemp *ts = arg_temp(op->args[i]);
5706         if (NEED_SYNC_ARG(i)) {
5707             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5708         } else if (IS_DEAD_ARG(i)) {
5709             temp_dead(s, ts);
5710         }
5711     }
5712 }
5713 
5714 /**
5715  * atom_and_align_for_opc:
5716  * @s: tcg context
5717  * @opc: memory operation code
5718  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5719  * @allow_two_ops: true if we are prepared to issue two operations
5720  *
5721  * Return the alignment and atomicity to use for the inline fast path
5722  * for the given memory operation.  The alignment may be larger than
5723  * that specified in @opc, and the correct alignment will be diagnosed
5724  * by the slow path helper.
5725  *
5726  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5727  * and issue two loads or stores for subalignment.
5728  */
5729 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5730                                            MemOp host_atom, bool allow_two_ops)
5731 {
5732     MemOp align = memop_alignment_bits(opc);
5733     MemOp size = opc & MO_SIZE;
5734     MemOp half = size ? size - 1 : 0;
5735     MemOp atom = opc & MO_ATOM_MASK;
5736     MemOp atmax;
5737 
5738     switch (atom) {
5739     case MO_ATOM_NONE:
5740         /* The operation requires no specific atomicity. */
5741         atmax = MO_8;
5742         break;
5743 
5744     case MO_ATOM_IFALIGN:
5745         atmax = size;
5746         break;
5747 
5748     case MO_ATOM_IFALIGN_PAIR:
5749         atmax = half;
5750         break;
5751 
5752     case MO_ATOM_WITHIN16:
5753         atmax = size;
5754         if (size == MO_128) {
5755             /* Misalignment implies !within16, and therefore no atomicity. */
5756         } else if (host_atom != MO_ATOM_WITHIN16) {
5757             /* The host does not implement within16, so require alignment. */
5758             align = MAX(align, size);
5759         }
5760         break;
5761 
5762     case MO_ATOM_WITHIN16_PAIR:
5763         atmax = size;
5764         /*
5765          * Misalignment implies !within16, and therefore half atomicity.
5766          * Any host prepared for two operations can implement this with
5767          * half alignment.
5768          */
5769         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5770             align = MAX(align, half);
5771         }
5772         break;
5773 
5774     case MO_ATOM_SUBALIGN:
5775         atmax = size;
5776         if (host_atom != MO_ATOM_SUBALIGN) {
5777             /* If unaligned but not odd, there are subobjects up to half. */
5778             if (allow_two_ops) {
5779                 align = MAX(align, half);
5780             } else {
5781                 align = MAX(align, size);
5782             }
5783         }
5784         break;
5785 
5786     default:
5787         g_assert_not_reached();
5788     }
5789 
5790     return (TCGAtomAlign){ .atom = atmax, .align = align };
5791 }
5792 
5793 /*
5794  * Similarly for qemu_ld/st slow path helpers.
5795  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5796  * using only the provided backend tcg_out_* functions.
5797  */
5798 
5799 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5800 {
5801     int ofs = arg_slot_stk_ofs(slot);
5802 
5803     /*
5804      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5805      * require extension to uint64_t, adjust the address for uint32_t.
5806      */
5807     if (HOST_BIG_ENDIAN &&
5808         TCG_TARGET_REG_BITS == 64 &&
5809         type == TCG_TYPE_I32) {
5810         ofs += 4;
5811     }
5812     return ofs;
5813 }
5814 
5815 static void tcg_out_helper_load_slots(TCGContext *s,
5816                                       unsigned nmov, TCGMovExtend *mov,
5817                                       const TCGLdstHelperParam *parm)
5818 {
5819     unsigned i;
5820     TCGReg dst3;
5821 
5822     /*
5823      * Start from the end, storing to the stack first.
5824      * This frees those registers, so we need not consider overlap.
5825      */
5826     for (i = nmov; i-- > 0; ) {
5827         unsigned slot = mov[i].dst;
5828 
5829         if (arg_slot_reg_p(slot)) {
5830             goto found_reg;
5831         }
5832 
5833         TCGReg src = mov[i].src;
5834         TCGType dst_type = mov[i].dst_type;
5835         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5836 
5837         /* The argument is going onto the stack; extend into scratch. */
5838         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5839             tcg_debug_assert(parm->ntmp != 0);
5840             mov[i].dst = src = parm->tmp[0];
5841             tcg_out_movext1(s, &mov[i]);
5842         }
5843 
5844         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5845                    tcg_out_helper_stk_ofs(dst_type, slot));
5846     }
5847     return;
5848 
5849  found_reg:
5850     /*
5851      * The remaining arguments are in registers.
5852      * Convert slot numbers to argument registers.
5853      */
5854     nmov = i + 1;
5855     for (i = 0; i < nmov; ++i) {
5856         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5857     }
5858 
5859     switch (nmov) {
5860     case 4:
5861         /* The backend must have provided enough temps for the worst case. */
5862         tcg_debug_assert(parm->ntmp >= 2);
5863 
5864         dst3 = mov[3].dst;
5865         for (unsigned j = 0; j < 3; ++j) {
5866             if (dst3 == mov[j].src) {
5867                 /*
5868                  * Conflict. Copy the source to a temporary, perform the
5869                  * remaining moves, then the extension from our scratch
5870                  * on the way out.
5871                  */
5872                 TCGReg scratch = parm->tmp[1];
5873 
5874                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5875                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5876                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5877                 break;
5878             }
5879         }
5880 
5881         /* No conflicts: perform this move and continue. */
5882         tcg_out_movext1(s, &mov[3]);
5883         /* fall through */
5884 
5885     case 3:
5886         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5887                         parm->ntmp ? parm->tmp[0] : -1);
5888         break;
5889     case 2:
5890         tcg_out_movext2(s, mov, mov + 1,
5891                         parm->ntmp ? parm->tmp[0] : -1);
5892         break;
5893     case 1:
5894         tcg_out_movext1(s, mov);
5895         break;
5896     default:
5897         g_assert_not_reached();
5898     }
5899 }
5900 
5901 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5902                                     TCGType type, tcg_target_long imm,
5903                                     const TCGLdstHelperParam *parm)
5904 {
5905     if (arg_slot_reg_p(slot)) {
5906         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5907     } else {
5908         int ofs = tcg_out_helper_stk_ofs(type, slot);
5909         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5910             tcg_debug_assert(parm->ntmp != 0);
5911             tcg_out_movi(s, type, parm->tmp[0], imm);
5912             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5913         }
5914     }
5915 }
5916 
5917 static void tcg_out_helper_load_common_args(TCGContext *s,
5918                                             const TCGLabelQemuLdst *ldst,
5919                                             const TCGLdstHelperParam *parm,
5920                                             const TCGHelperInfo *info,
5921                                             unsigned next_arg)
5922 {
5923     TCGMovExtend ptr_mov = {
5924         .dst_type = TCG_TYPE_PTR,
5925         .src_type = TCG_TYPE_PTR,
5926         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5927     };
5928     const TCGCallArgumentLoc *loc = &info->in[0];
5929     TCGType type;
5930     unsigned slot;
5931     tcg_target_ulong imm;
5932 
5933     /*
5934      * Handle env, which is always first.
5935      */
5936     ptr_mov.dst = loc->arg_slot;
5937     ptr_mov.src = TCG_AREG0;
5938     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5939 
5940     /*
5941      * Handle oi.
5942      */
5943     imm = ldst->oi;
5944     loc = &info->in[next_arg];
5945     type = TCG_TYPE_I32;
5946     switch (loc->kind) {
5947     case TCG_CALL_ARG_NORMAL:
5948         break;
5949     case TCG_CALL_ARG_EXTEND_U:
5950     case TCG_CALL_ARG_EXTEND_S:
5951         /* No extension required for MemOpIdx. */
5952         tcg_debug_assert(imm <= INT32_MAX);
5953         type = TCG_TYPE_REG;
5954         break;
5955     default:
5956         g_assert_not_reached();
5957     }
5958     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5959     next_arg++;
5960 
5961     /*
5962      * Handle ra.
5963      */
5964     loc = &info->in[next_arg];
5965     slot = loc->arg_slot;
5966     if (parm->ra_gen) {
5967         int arg_reg = -1;
5968         TCGReg ra_reg;
5969 
5970         if (arg_slot_reg_p(slot)) {
5971             arg_reg = tcg_target_call_iarg_regs[slot];
5972         }
5973         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5974 
5975         ptr_mov.dst = slot;
5976         ptr_mov.src = ra_reg;
5977         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5978     } else {
5979         imm = (uintptr_t)ldst->raddr;
5980         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5981     }
5982 }
5983 
5984 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5985                                        const TCGCallArgumentLoc *loc,
5986                                        TCGType dst_type, TCGType src_type,
5987                                        TCGReg lo, TCGReg hi)
5988 {
5989     MemOp reg_mo;
5990 
5991     if (dst_type <= TCG_TYPE_REG) {
5992         MemOp src_ext;
5993 
5994         switch (loc->kind) {
5995         case TCG_CALL_ARG_NORMAL:
5996             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5997             break;
5998         case TCG_CALL_ARG_EXTEND_U:
5999             dst_type = TCG_TYPE_REG;
6000             src_ext = MO_UL;
6001             break;
6002         case TCG_CALL_ARG_EXTEND_S:
6003             dst_type = TCG_TYPE_REG;
6004             src_ext = MO_SL;
6005             break;
6006         default:
6007             g_assert_not_reached();
6008         }
6009 
6010         mov[0].dst = loc->arg_slot;
6011         mov[0].dst_type = dst_type;
6012         mov[0].src = lo;
6013         mov[0].src_type = src_type;
6014         mov[0].src_ext = src_ext;
6015         return 1;
6016     }
6017 
6018     if (TCG_TARGET_REG_BITS == 32) {
6019         assert(dst_type == TCG_TYPE_I64);
6020         reg_mo = MO_32;
6021     } else {
6022         assert(dst_type == TCG_TYPE_I128);
6023         reg_mo = MO_64;
6024     }
6025 
6026     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6027     mov[0].src = lo;
6028     mov[0].dst_type = TCG_TYPE_REG;
6029     mov[0].src_type = TCG_TYPE_REG;
6030     mov[0].src_ext = reg_mo;
6031 
6032     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6033     mov[1].src = hi;
6034     mov[1].dst_type = TCG_TYPE_REG;
6035     mov[1].src_type = TCG_TYPE_REG;
6036     mov[1].src_ext = reg_mo;
6037 
6038     return 2;
6039 }
6040 
6041 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6042                                    const TCGLdstHelperParam *parm)
6043 {
6044     const TCGHelperInfo *info;
6045     const TCGCallArgumentLoc *loc;
6046     TCGMovExtend mov[2];
6047     unsigned next_arg, nmov;
6048     MemOp mop = get_memop(ldst->oi);
6049 
6050     switch (mop & MO_SIZE) {
6051     case MO_8:
6052     case MO_16:
6053     case MO_32:
6054         info = &info_helper_ld32_mmu;
6055         break;
6056     case MO_64:
6057         info = &info_helper_ld64_mmu;
6058         break;
6059     case MO_128:
6060         info = &info_helper_ld128_mmu;
6061         break;
6062     default:
6063         g_assert_not_reached();
6064     }
6065 
6066     /* Defer env argument. */
6067     next_arg = 1;
6068 
6069     loc = &info->in[next_arg];
6070     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6071         /*
6072          * 32-bit host with 32-bit guest: zero-extend the guest address
6073          * to 64-bits for the helper by storing the low part, then
6074          * load a zero for the high part.
6075          */
6076         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6077                                TCG_TYPE_I32, TCG_TYPE_I32,
6078                                ldst->addr_reg, -1);
6079         tcg_out_helper_load_slots(s, 1, mov, parm);
6080 
6081         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6082                                 TCG_TYPE_I32, 0, parm);
6083         next_arg += 2;
6084     } else {
6085         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6086                                       ldst->addr_reg, -1);
6087         tcg_out_helper_load_slots(s, nmov, mov, parm);
6088         next_arg += nmov;
6089     }
6090 
6091     switch (info->out_kind) {
6092     case TCG_CALL_RET_NORMAL:
6093     case TCG_CALL_RET_BY_VEC:
6094         break;
6095     case TCG_CALL_RET_BY_REF:
6096         /*
6097          * The return reference is in the first argument slot.
6098          * We need memory in which to return: re-use the top of stack.
6099          */
6100         {
6101             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6102 
6103             if (arg_slot_reg_p(0)) {
6104                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6105                                  TCG_REG_CALL_STACK, ofs_slot0);
6106             } else {
6107                 tcg_debug_assert(parm->ntmp != 0);
6108                 tcg_out_addi_ptr(s, parm->tmp[0],
6109                                  TCG_REG_CALL_STACK, ofs_slot0);
6110                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6111                            TCG_REG_CALL_STACK, ofs_slot0);
6112             }
6113         }
6114         break;
6115     default:
6116         g_assert_not_reached();
6117     }
6118 
6119     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6120 }
6121 
6122 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6123                                   bool load_sign,
6124                                   const TCGLdstHelperParam *parm)
6125 {
6126     MemOp mop = get_memop(ldst->oi);
6127     TCGMovExtend mov[2];
6128     int ofs_slot0;
6129 
6130     switch (ldst->type) {
6131     case TCG_TYPE_I64:
6132         if (TCG_TARGET_REG_BITS == 32) {
6133             break;
6134         }
6135         /* fall through */
6136 
6137     case TCG_TYPE_I32:
6138         mov[0].dst = ldst->datalo_reg;
6139         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6140         mov[0].dst_type = ldst->type;
6141         mov[0].src_type = TCG_TYPE_REG;
6142 
6143         /*
6144          * If load_sign, then we allowed the helper to perform the
6145          * appropriate sign extension to tcg_target_ulong, and all
6146          * we need now is a plain move.
6147          *
6148          * If they do not, then we expect the relevant extension
6149          * instruction to be no more expensive than a move, and
6150          * we thus save the icache etc by only using one of two
6151          * helper functions.
6152          */
6153         if (load_sign || !(mop & MO_SIGN)) {
6154             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6155                 mov[0].src_ext = MO_32;
6156             } else {
6157                 mov[0].src_ext = MO_64;
6158             }
6159         } else {
6160             mov[0].src_ext = mop & MO_SSIZE;
6161         }
6162         tcg_out_movext1(s, mov);
6163         return;
6164 
6165     case TCG_TYPE_I128:
6166         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6167         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6168         switch (TCG_TARGET_CALL_RET_I128) {
6169         case TCG_CALL_RET_NORMAL:
6170             break;
6171         case TCG_CALL_RET_BY_VEC:
6172             tcg_out_st(s, TCG_TYPE_V128,
6173                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6174                        TCG_REG_CALL_STACK, ofs_slot0);
6175             /* fall through */
6176         case TCG_CALL_RET_BY_REF:
6177             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6178                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6179             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6180                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6181             return;
6182         default:
6183             g_assert_not_reached();
6184         }
6185         break;
6186 
6187     default:
6188         g_assert_not_reached();
6189     }
6190 
6191     mov[0].dst = ldst->datalo_reg;
6192     mov[0].src =
6193         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6194     mov[0].dst_type = TCG_TYPE_REG;
6195     mov[0].src_type = TCG_TYPE_REG;
6196     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6197 
6198     mov[1].dst = ldst->datahi_reg;
6199     mov[1].src =
6200         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6201     mov[1].dst_type = TCG_TYPE_REG;
6202     mov[1].src_type = TCG_TYPE_REG;
6203     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6204 
6205     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6206 }
6207 
6208 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6209                                    const TCGLdstHelperParam *parm)
6210 {
6211     const TCGHelperInfo *info;
6212     const TCGCallArgumentLoc *loc;
6213     TCGMovExtend mov[4];
6214     TCGType data_type;
6215     unsigned next_arg, nmov, n;
6216     MemOp mop = get_memop(ldst->oi);
6217 
6218     switch (mop & MO_SIZE) {
6219     case MO_8:
6220     case MO_16:
6221     case MO_32:
6222         info = &info_helper_st32_mmu;
6223         data_type = TCG_TYPE_I32;
6224         break;
6225     case MO_64:
6226         info = &info_helper_st64_mmu;
6227         data_type = TCG_TYPE_I64;
6228         break;
6229     case MO_128:
6230         info = &info_helper_st128_mmu;
6231         data_type = TCG_TYPE_I128;
6232         break;
6233     default:
6234         g_assert_not_reached();
6235     }
6236 
6237     /* Defer env argument. */
6238     next_arg = 1;
6239     nmov = 0;
6240 
6241     /* Handle addr argument. */
6242     loc = &info->in[next_arg];
6243     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6244     if (TCG_TARGET_REG_BITS == 32) {
6245         /*
6246          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6247          * to 64-bits for the helper by storing the low part.  Later,
6248          * after we have processed the register inputs, we will load a
6249          * zero for the high part.
6250          */
6251         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6252                                TCG_TYPE_I32, TCG_TYPE_I32,
6253                                ldst->addr_reg, -1);
6254         next_arg += 2;
6255         nmov += 1;
6256     } else {
6257         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6258                                    ldst->addr_reg, -1);
6259         next_arg += n;
6260         nmov += n;
6261     }
6262 
6263     /* Handle data argument. */
6264     loc = &info->in[next_arg];
6265     switch (loc->kind) {
6266     case TCG_CALL_ARG_NORMAL:
6267     case TCG_CALL_ARG_EXTEND_U:
6268     case TCG_CALL_ARG_EXTEND_S:
6269         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6270                                    ldst->datalo_reg, ldst->datahi_reg);
6271         next_arg += n;
6272         nmov += n;
6273         tcg_out_helper_load_slots(s, nmov, mov, parm);
6274         break;
6275 
6276     case TCG_CALL_ARG_BY_REF:
6277         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6278         tcg_debug_assert(data_type == TCG_TYPE_I128);
6279         tcg_out_st(s, TCG_TYPE_I64,
6280                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6281                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6282         tcg_out_st(s, TCG_TYPE_I64,
6283                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6284                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6285 
6286         tcg_out_helper_load_slots(s, nmov, mov, parm);
6287 
6288         if (arg_slot_reg_p(loc->arg_slot)) {
6289             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6290                              TCG_REG_CALL_STACK,
6291                              arg_slot_stk_ofs(loc->ref_slot));
6292         } else {
6293             tcg_debug_assert(parm->ntmp != 0);
6294             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6295                              arg_slot_stk_ofs(loc->ref_slot));
6296             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6297                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6298         }
6299         next_arg += 2;
6300         break;
6301 
6302     default:
6303         g_assert_not_reached();
6304     }
6305 
6306     if (TCG_TARGET_REG_BITS == 32) {
6307         /* Zero extend the address by loading a zero for the high part. */
6308         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6309         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6310     }
6311 
6312     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6313 }
6314 
6315 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6316 {
6317     int i, start_words, num_insns;
6318     TCGOp *op;
6319 
6320     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6321                  && qemu_log_in_addr_range(pc_start))) {
6322         FILE *logfile = qemu_log_trylock();
6323         if (logfile) {
6324             fprintf(logfile, "OP:\n");
6325             tcg_dump_ops(s, logfile, false);
6326             fprintf(logfile, "\n");
6327             qemu_log_unlock(logfile);
6328         }
6329     }
6330 
6331 #ifdef CONFIG_DEBUG_TCG
6332     /* Ensure all labels referenced have been emitted.  */
6333     {
6334         TCGLabel *l;
6335         bool error = false;
6336 
6337         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6338             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6339                 qemu_log_mask(CPU_LOG_TB_OP,
6340                               "$L%d referenced but not present.\n", l->id);
6341                 error = true;
6342             }
6343         }
6344         assert(!error);
6345     }
6346 #endif
6347 
6348     /* Do not reuse any EBB that may be allocated within the TB. */
6349     tcg_temp_ebb_reset_freed(s);
6350 
6351     tcg_optimize(s);
6352 
6353     reachable_code_pass(s);
6354     liveness_pass_0(s);
6355     liveness_pass_1(s);
6356 
6357     if (s->nb_indirects > 0) {
6358         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6359                      && qemu_log_in_addr_range(pc_start))) {
6360             FILE *logfile = qemu_log_trylock();
6361             if (logfile) {
6362                 fprintf(logfile, "OP before indirect lowering:\n");
6363                 tcg_dump_ops(s, logfile, false);
6364                 fprintf(logfile, "\n");
6365                 qemu_log_unlock(logfile);
6366             }
6367         }
6368 
6369         /* Replace indirect temps with direct temps.  */
6370         if (liveness_pass_2(s)) {
6371             /* If changes were made, re-run liveness.  */
6372             liveness_pass_1(s);
6373         }
6374     }
6375 
6376     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6377                  && qemu_log_in_addr_range(pc_start))) {
6378         FILE *logfile = qemu_log_trylock();
6379         if (logfile) {
6380             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6381             tcg_dump_ops(s, logfile, true);
6382             fprintf(logfile, "\n");
6383             qemu_log_unlock(logfile);
6384         }
6385     }
6386 
6387     /* Initialize goto_tb jump offsets. */
6388     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6389     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6390     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6391     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6392 
6393     tcg_reg_alloc_start(s);
6394 
6395     /*
6396      * Reset the buffer pointers when restarting after overflow.
6397      * TODO: Move this into translate-all.c with the rest of the
6398      * buffer management.  Having only this done here is confusing.
6399      */
6400     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6401     s->code_ptr = s->code_buf;
6402     s->data_gen_ptr = NULL;
6403 
6404     QSIMPLEQ_INIT(&s->ldst_labels);
6405     s->pool_labels = NULL;
6406 
6407     start_words = s->insn_start_words;
6408     s->gen_insn_data =
6409         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6410 
6411     tcg_out_tb_start(s);
6412 
6413     num_insns = -1;
6414     QTAILQ_FOREACH(op, &s->ops, link) {
6415         TCGOpcode opc = op->opc;
6416 
6417         switch (opc) {
6418         case INDEX_op_mov_i32:
6419         case INDEX_op_mov_i64:
6420         case INDEX_op_mov_vec:
6421             tcg_reg_alloc_mov(s, op);
6422             break;
6423         case INDEX_op_dup_vec:
6424             tcg_reg_alloc_dup(s, op);
6425             break;
6426         case INDEX_op_insn_start:
6427             if (num_insns >= 0) {
6428                 size_t off = tcg_current_code_size(s);
6429                 s->gen_insn_end_off[num_insns] = off;
6430                 /* Assert that we do not overflow our stored offset.  */
6431                 assert(s->gen_insn_end_off[num_insns] == off);
6432             }
6433             num_insns++;
6434             for (i = 0; i < start_words; ++i) {
6435                 s->gen_insn_data[num_insns * start_words + i] =
6436                     tcg_get_insn_start_param(op, i);
6437             }
6438             break;
6439         case INDEX_op_discard:
6440             temp_dead(s, arg_temp(op->args[0]));
6441             break;
6442         case INDEX_op_set_label:
6443             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6444             tcg_out_label(s, arg_label(op->args[0]));
6445             break;
6446         case INDEX_op_call:
6447             tcg_reg_alloc_call(s, op);
6448             break;
6449         case INDEX_op_exit_tb:
6450             tcg_out_exit_tb(s, op->args[0]);
6451             break;
6452         case INDEX_op_goto_tb:
6453             tcg_out_goto_tb(s, op->args[0]);
6454             break;
6455         case INDEX_op_dup2_vec:
6456             if (tcg_reg_alloc_dup2(s, op)) {
6457                 break;
6458             }
6459             /* fall through */
6460         default:
6461             /* Sanity check that we've not introduced any unhandled opcodes. */
6462             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6463                                               TCGOP_FLAGS(op)));
6464             /* Note: in order to speed up the code, it would be much
6465                faster to have specialized register allocator functions for
6466                some common argument patterns */
6467             tcg_reg_alloc_op(s, op);
6468             break;
6469         }
6470         /* Test for (pending) buffer overflow.  The assumption is that any
6471            one operation beginning below the high water mark cannot overrun
6472            the buffer completely.  Thus we can test for overflow after
6473            generating code without having to check during generation.  */
6474         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6475             return -1;
6476         }
6477         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6478         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6479             return -2;
6480         }
6481     }
6482     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6483     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6484 
6485     /* Generate TB finalization at the end of block */
6486     i = tcg_out_ldst_finalize(s);
6487     if (i < 0) {
6488         return i;
6489     }
6490     i = tcg_out_pool_finalize(s);
6491     if (i < 0) {
6492         return i;
6493     }
6494     if (!tcg_resolve_relocs(s)) {
6495         return -2;
6496     }
6497 
6498 #ifndef CONFIG_TCG_INTERPRETER
6499     /* flush instruction cache */
6500     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6501                         (uintptr_t)s->code_buf,
6502                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6503 #endif
6504 
6505     return tcg_current_code_size(s);
6506 }
6507 
6508 #ifdef ELF_HOST_MACHINE
6509 /* In order to use this feature, the backend needs to do three things:
6510 
6511    (1) Define ELF_HOST_MACHINE to indicate both what value to
6512        put into the ELF image and to indicate support for the feature.
6513 
6514    (2) Define tcg_register_jit.  This should create a buffer containing
6515        the contents of a .debug_frame section that describes the post-
6516        prologue unwind info for the tcg machine.
6517 
6518    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6519 */
6520 
6521 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6522 typedef enum {
6523     JIT_NOACTION = 0,
6524     JIT_REGISTER_FN,
6525     JIT_UNREGISTER_FN
6526 } jit_actions_t;
6527 
6528 struct jit_code_entry {
6529     struct jit_code_entry *next_entry;
6530     struct jit_code_entry *prev_entry;
6531     const void *symfile_addr;
6532     uint64_t symfile_size;
6533 };
6534 
6535 struct jit_descriptor {
6536     uint32_t version;
6537     uint32_t action_flag;
6538     struct jit_code_entry *relevant_entry;
6539     struct jit_code_entry *first_entry;
6540 };
6541 
6542 void __jit_debug_register_code(void) __attribute__((noinline));
6543 void __jit_debug_register_code(void)
6544 {
6545     asm("");
6546 }
6547 
6548 /* Must statically initialize the version, because GDB may check
6549    the version before we can set it.  */
6550 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6551 
6552 /* End GDB interface.  */
6553 
6554 static int find_string(const char *strtab, const char *str)
6555 {
6556     const char *p = strtab + 1;
6557 
6558     while (1) {
6559         if (strcmp(p, str) == 0) {
6560             return p - strtab;
6561         }
6562         p += strlen(p) + 1;
6563     }
6564 }
6565 
6566 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6567                                  const void *debug_frame,
6568                                  size_t debug_frame_size)
6569 {
6570     struct __attribute__((packed)) DebugInfo {
6571         uint32_t  len;
6572         uint16_t  version;
6573         uint32_t  abbrev;
6574         uint8_t   ptr_size;
6575         uint8_t   cu_die;
6576         uint16_t  cu_lang;
6577         uintptr_t cu_low_pc;
6578         uintptr_t cu_high_pc;
6579         uint8_t   fn_die;
6580         char      fn_name[16];
6581         uintptr_t fn_low_pc;
6582         uintptr_t fn_high_pc;
6583         uint8_t   cu_eoc;
6584     };
6585 
6586     struct ElfImage {
6587         ElfW(Ehdr) ehdr;
6588         ElfW(Phdr) phdr;
6589         ElfW(Shdr) shdr[7];
6590         ElfW(Sym)  sym[2];
6591         struct DebugInfo di;
6592         uint8_t    da[24];
6593         char       str[80];
6594     };
6595 
6596     struct ElfImage *img;
6597 
6598     static const struct ElfImage img_template = {
6599         .ehdr = {
6600             .e_ident[EI_MAG0] = ELFMAG0,
6601             .e_ident[EI_MAG1] = ELFMAG1,
6602             .e_ident[EI_MAG2] = ELFMAG2,
6603             .e_ident[EI_MAG3] = ELFMAG3,
6604             .e_ident[EI_CLASS] = ELF_CLASS,
6605             .e_ident[EI_DATA] = ELF_DATA,
6606             .e_ident[EI_VERSION] = EV_CURRENT,
6607             .e_type = ET_EXEC,
6608             .e_machine = ELF_HOST_MACHINE,
6609             .e_version = EV_CURRENT,
6610             .e_phoff = offsetof(struct ElfImage, phdr),
6611             .e_shoff = offsetof(struct ElfImage, shdr),
6612             .e_ehsize = sizeof(ElfW(Shdr)),
6613             .e_phentsize = sizeof(ElfW(Phdr)),
6614             .e_phnum = 1,
6615             .e_shentsize = sizeof(ElfW(Shdr)),
6616             .e_shnum = ARRAY_SIZE(img->shdr),
6617             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6618 #ifdef ELF_HOST_FLAGS
6619             .e_flags = ELF_HOST_FLAGS,
6620 #endif
6621 #ifdef ELF_OSABI
6622             .e_ident[EI_OSABI] = ELF_OSABI,
6623 #endif
6624         },
6625         .phdr = {
6626             .p_type = PT_LOAD,
6627             .p_flags = PF_X,
6628         },
6629         .shdr = {
6630             [0] = { .sh_type = SHT_NULL },
6631             /* Trick: The contents of code_gen_buffer are not present in
6632                this fake ELF file; that got allocated elsewhere.  Therefore
6633                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6634                will not look for contents.  We can record any address.  */
6635             [1] = { /* .text */
6636                 .sh_type = SHT_NOBITS,
6637                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6638             },
6639             [2] = { /* .debug_info */
6640                 .sh_type = SHT_PROGBITS,
6641                 .sh_offset = offsetof(struct ElfImage, di),
6642                 .sh_size = sizeof(struct DebugInfo),
6643             },
6644             [3] = { /* .debug_abbrev */
6645                 .sh_type = SHT_PROGBITS,
6646                 .sh_offset = offsetof(struct ElfImage, da),
6647                 .sh_size = sizeof(img->da),
6648             },
6649             [4] = { /* .debug_frame */
6650                 .sh_type = SHT_PROGBITS,
6651                 .sh_offset = sizeof(struct ElfImage),
6652             },
6653             [5] = { /* .symtab */
6654                 .sh_type = SHT_SYMTAB,
6655                 .sh_offset = offsetof(struct ElfImage, sym),
6656                 .sh_size = sizeof(img->sym),
6657                 .sh_info = 1,
6658                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6659                 .sh_entsize = sizeof(ElfW(Sym)),
6660             },
6661             [6] = { /* .strtab */
6662                 .sh_type = SHT_STRTAB,
6663                 .sh_offset = offsetof(struct ElfImage, str),
6664                 .sh_size = sizeof(img->str),
6665             }
6666         },
6667         .sym = {
6668             [1] = { /* code_gen_buffer */
6669                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6670                 .st_shndx = 1,
6671             }
6672         },
6673         .di = {
6674             .len = sizeof(struct DebugInfo) - 4,
6675             .version = 2,
6676             .ptr_size = sizeof(void *),
6677             .cu_die = 1,
6678             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6679             .fn_die = 2,
6680             .fn_name = "code_gen_buffer"
6681         },
6682         .da = {
6683             1,          /* abbrev number (the cu) */
6684             0x11, 1,    /* DW_TAG_compile_unit, has children */
6685             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6686             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6687             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6688             0, 0,       /* end of abbrev */
6689             2,          /* abbrev number (the fn) */
6690             0x2e, 0,    /* DW_TAG_subprogram, no children */
6691             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6692             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6693             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6694             0, 0,       /* end of abbrev */
6695             0           /* no more abbrev */
6696         },
6697         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6698                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6699     };
6700 
6701     /* We only need a single jit entry; statically allocate it.  */
6702     static struct jit_code_entry one_entry;
6703 
6704     uintptr_t buf = (uintptr_t)buf_ptr;
6705     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6706     DebugFrameHeader *dfh;
6707 
6708     img = g_malloc(img_size);
6709     *img = img_template;
6710 
6711     img->phdr.p_vaddr = buf;
6712     img->phdr.p_paddr = buf;
6713     img->phdr.p_memsz = buf_size;
6714 
6715     img->shdr[1].sh_name = find_string(img->str, ".text");
6716     img->shdr[1].sh_addr = buf;
6717     img->shdr[1].sh_size = buf_size;
6718 
6719     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6720     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6721 
6722     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6723     img->shdr[4].sh_size = debug_frame_size;
6724 
6725     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6726     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6727 
6728     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6729     img->sym[1].st_value = buf;
6730     img->sym[1].st_size = buf_size;
6731 
6732     img->di.cu_low_pc = buf;
6733     img->di.cu_high_pc = buf + buf_size;
6734     img->di.fn_low_pc = buf;
6735     img->di.fn_high_pc = buf + buf_size;
6736 
6737     dfh = (DebugFrameHeader *)(img + 1);
6738     memcpy(dfh, debug_frame, debug_frame_size);
6739     dfh->fde.func_start = buf;
6740     dfh->fde.func_len = buf_size;
6741 
6742 #ifdef DEBUG_JIT
6743     /* Enable this block to be able to debug the ELF image file creation.
6744        One can use readelf, objdump, or other inspection utilities.  */
6745     {
6746         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6747         FILE *f = fopen(jit, "w+b");
6748         if (f) {
6749             if (fwrite(img, img_size, 1, f) != img_size) {
6750                 /* Avoid stupid unused return value warning for fwrite.  */
6751             }
6752             fclose(f);
6753         }
6754     }
6755 #endif
6756 
6757     one_entry.symfile_addr = img;
6758     one_entry.symfile_size = img_size;
6759 
6760     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6761     __jit_debug_descriptor.relevant_entry = &one_entry;
6762     __jit_debug_descriptor.first_entry = &one_entry;
6763     __jit_debug_register_code();
6764 }
6765 #else
6766 /* No support for the feature.  Provide the entry point expected by exec.c,
6767    and implement the internal function we declared earlier.  */
6768 
6769 static void tcg_register_jit_int(const void *buf, size_t size,
6770                                  const void *debug_frame,
6771                                  size_t debug_frame_size)
6772 {
6773 }
6774 
6775 void tcg_register_jit(const void *buf, size_t buf_size)
6776 {
6777 }
6778 #endif /* ELF_HOST_MACHINE */
6779 
6780 #if !TCG_TARGET_MAYBE_vec
6781 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6782 {
6783     g_assert_not_reached();
6784 }
6785 #endif
6786