xref: /qemu/tcg/tcg.c (revision 09ac62682b8d2a8bac36d068f63a31331cc6259a)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_NotImplemented = -1,
865 #include "tcg-target-con-set.h"
866 } TCGConstraintSetIndex;
867 
868 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
869 
870 #undef C_O0_I1
871 #undef C_O0_I2
872 #undef C_O0_I3
873 #undef C_O0_I4
874 #undef C_O1_I1
875 #undef C_O1_I2
876 #undef C_O1_I3
877 #undef C_O1_I4
878 #undef C_N1_I2
879 #undef C_N1O1_I1
880 #undef C_N2_I1
881 #undef C_O2_I1
882 #undef C_O2_I2
883 #undef C_O2_I3
884 #undef C_O2_I4
885 #undef C_N1_O1_I4
886 
887 /* Put all of the constraint sets into an array, indexed by the enum. */
888 
889 typedef struct TCGConstraintSet {
890     uint8_t nb_oargs, nb_iargs;
891     const char *args_ct_str[TCG_MAX_OP_ARGS];
892 } TCGConstraintSet;
893 
894 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
895 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
896 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
897 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
898 
899 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
900 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
901 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
902 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
903 
904 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
905 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
906 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
907 
908 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
909 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
910 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
911 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
912 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
913 
914 static const TCGConstraintSet constraint_sets[] = {
915 #include "tcg-target-con-set.h"
916 };
917 
918 #undef C_O0_I1
919 #undef C_O0_I2
920 #undef C_O0_I3
921 #undef C_O0_I4
922 #undef C_O1_I1
923 #undef C_O1_I2
924 #undef C_O1_I3
925 #undef C_O1_I4
926 #undef C_N1_I2
927 #undef C_N1O1_I1
928 #undef C_N2_I1
929 #undef C_O2_I1
930 #undef C_O2_I2
931 #undef C_O2_I3
932 #undef C_O2_I4
933 #undef C_N1_O1_I4
934 
935 /* Expand the enumerator to be returned from tcg_target_op_def(). */
936 
937 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
938 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
939 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
940 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
941 
942 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
943 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
944 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
945 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
946 
947 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
948 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
949 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
950 
951 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
952 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
953 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
954 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
955 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
956 
957 #include "tcg-target.c.inc"
958 
959 #ifndef CONFIG_TCG_INTERPRETER
960 /* Validate CPUTLBDescFast placement. */
961 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
962                         sizeof(CPUNegativeOffsetState))
963                   < MIN_TLB_MASK_TABLE_OFS);
964 #endif
965 
966 /*
967  * All TCG threads except the parent (i.e. the one that called tcg_context_init
968  * and registered the target's TCG globals) must register with this function
969  * before initiating translation.
970  *
971  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
972  * of tcg_region_init() for the reasoning behind this.
973  *
974  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
975  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
976  * is not used anymore for translation once this function is called.
977  *
978  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
979  * iterates over the array (e.g. tcg_code_size() the same for both system/user
980  * modes.
981  */
982 #ifdef CONFIG_USER_ONLY
983 void tcg_register_thread(void)
984 {
985     tcg_ctx = &tcg_init_ctx;
986 }
987 #else
988 void tcg_register_thread(void)
989 {
990     TCGContext *s = g_malloc(sizeof(*s));
991     unsigned int i, n;
992 
993     *s = tcg_init_ctx;
994 
995     /* Relink mem_base.  */
996     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
997         if (tcg_init_ctx.temps[i].mem_base) {
998             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
999             tcg_debug_assert(b >= 0 && b < n);
1000             s->temps[i].mem_base = &s->temps[b];
1001         }
1002     }
1003 
1004     /* Claim an entry in tcg_ctxs */
1005     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1006     g_assert(n < tcg_max_ctxs);
1007     qatomic_set(&tcg_ctxs[n], s);
1008 
1009     if (n > 0) {
1010         tcg_region_initial_alloc(s);
1011     }
1012 
1013     tcg_ctx = s;
1014 }
1015 #endif /* !CONFIG_USER_ONLY */
1016 
1017 /* pool based memory allocation */
1018 void *tcg_malloc_internal(TCGContext *s, int size)
1019 {
1020     TCGPool *p;
1021     int pool_size;
1022 
1023     if (size > TCG_POOL_CHUNK_SIZE) {
1024         /* big malloc: insert a new pool (XXX: could optimize) */
1025         p = g_malloc(sizeof(TCGPool) + size);
1026         p->size = size;
1027         p->next = s->pool_first_large;
1028         s->pool_first_large = p;
1029         return p->data;
1030     } else {
1031         p = s->pool_current;
1032         if (!p) {
1033             p = s->pool_first;
1034             if (!p)
1035                 goto new_pool;
1036         } else {
1037             if (!p->next) {
1038             new_pool:
1039                 pool_size = TCG_POOL_CHUNK_SIZE;
1040                 p = g_malloc(sizeof(TCGPool) + pool_size);
1041                 p->size = pool_size;
1042                 p->next = NULL;
1043                 if (s->pool_current) {
1044                     s->pool_current->next = p;
1045                 } else {
1046                     s->pool_first = p;
1047                 }
1048             } else {
1049                 p = p->next;
1050             }
1051         }
1052     }
1053     s->pool_current = p;
1054     s->pool_cur = p->data + size;
1055     s->pool_end = p->data + p->size;
1056     return p->data;
1057 }
1058 
1059 void tcg_pool_reset(TCGContext *s)
1060 {
1061     TCGPool *p, *t;
1062     for (p = s->pool_first_large; p; p = t) {
1063         t = p->next;
1064         g_free(p);
1065     }
1066     s->pool_first_large = NULL;
1067     s->pool_cur = s->pool_end = NULL;
1068     s->pool_current = NULL;
1069 }
1070 
1071 /*
1072  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1073  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1074  * We only use these for layout in tcg_out_ld_helper_ret and
1075  * tcg_out_st_helper_args, and share them between several of
1076  * the helpers, with the end result that it's easier to build manually.
1077  */
1078 
1079 #if TCG_TARGET_REG_BITS == 32
1080 # define dh_typecode_ttl  dh_typecode_i32
1081 #else
1082 # define dh_typecode_ttl  dh_typecode_i64
1083 #endif
1084 
1085 static TCGHelperInfo info_helper_ld32_mmu = {
1086     .flags = TCG_CALL_NO_WG,
1087     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1088               | dh_typemask(env, 1)
1089               | dh_typemask(i64, 2)  /* uint64_t addr */
1090               | dh_typemask(i32, 3)  /* unsigned oi */
1091               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1092 };
1093 
1094 static TCGHelperInfo info_helper_ld64_mmu = {
1095     .flags = TCG_CALL_NO_WG,
1096     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1097               | dh_typemask(env, 1)
1098               | dh_typemask(i64, 2)  /* uint64_t addr */
1099               | dh_typemask(i32, 3)  /* unsigned oi */
1100               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1101 };
1102 
1103 static TCGHelperInfo info_helper_ld128_mmu = {
1104     .flags = TCG_CALL_NO_WG,
1105     .typemask = dh_typemask(i128, 0) /* return Int128 */
1106               | dh_typemask(env, 1)
1107               | dh_typemask(i64, 2)  /* uint64_t addr */
1108               | dh_typemask(i32, 3)  /* unsigned oi */
1109               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1110 };
1111 
1112 static TCGHelperInfo info_helper_st32_mmu = {
1113     .flags = TCG_CALL_NO_WG,
1114     .typemask = dh_typemask(void, 0)
1115               | dh_typemask(env, 1)
1116               | dh_typemask(i64, 2)  /* uint64_t addr */
1117               | dh_typemask(i32, 3)  /* uint32_t data */
1118               | dh_typemask(i32, 4)  /* unsigned oi */
1119               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1120 };
1121 
1122 static TCGHelperInfo info_helper_st64_mmu = {
1123     .flags = TCG_CALL_NO_WG,
1124     .typemask = dh_typemask(void, 0)
1125               | dh_typemask(env, 1)
1126               | dh_typemask(i64, 2)  /* uint64_t addr */
1127               | dh_typemask(i64, 3)  /* uint64_t data */
1128               | dh_typemask(i32, 4)  /* unsigned oi */
1129               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1130 };
1131 
1132 static TCGHelperInfo info_helper_st128_mmu = {
1133     .flags = TCG_CALL_NO_WG,
1134     .typemask = dh_typemask(void, 0)
1135               | dh_typemask(env, 1)
1136               | dh_typemask(i64, 2)  /* uint64_t addr */
1137               | dh_typemask(i128, 3) /* Int128 data */
1138               | dh_typemask(i32, 4)  /* unsigned oi */
1139               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1140 };
1141 
1142 #ifdef CONFIG_TCG_INTERPRETER
1143 static ffi_type *typecode_to_ffi(int argmask)
1144 {
1145     /*
1146      * libffi does not support __int128_t, so we have forced Int128
1147      * to use the structure definition instead of the builtin type.
1148      */
1149     static ffi_type *ffi_type_i128_elements[3] = {
1150         &ffi_type_uint64,
1151         &ffi_type_uint64,
1152         NULL
1153     };
1154     static ffi_type ffi_type_i128 = {
1155         .size = 16,
1156         .alignment = __alignof__(Int128),
1157         .type = FFI_TYPE_STRUCT,
1158         .elements = ffi_type_i128_elements,
1159     };
1160 
1161     switch (argmask) {
1162     case dh_typecode_void:
1163         return &ffi_type_void;
1164     case dh_typecode_i32:
1165         return &ffi_type_uint32;
1166     case dh_typecode_s32:
1167         return &ffi_type_sint32;
1168     case dh_typecode_i64:
1169         return &ffi_type_uint64;
1170     case dh_typecode_s64:
1171         return &ffi_type_sint64;
1172     case dh_typecode_ptr:
1173         return &ffi_type_pointer;
1174     case dh_typecode_i128:
1175         return &ffi_type_i128;
1176     }
1177     g_assert_not_reached();
1178 }
1179 
1180 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1181 {
1182     unsigned typemask = info->typemask;
1183     struct {
1184         ffi_cif cif;
1185         ffi_type *args[];
1186     } *ca;
1187     ffi_status status;
1188     int nargs;
1189 
1190     /* Ignoring the return type, find the last non-zero field. */
1191     nargs = 32 - clz32(typemask >> 3);
1192     nargs = DIV_ROUND_UP(nargs, 3);
1193     assert(nargs <= MAX_CALL_IARGS);
1194 
1195     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1196     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1197     ca->cif.nargs = nargs;
1198 
1199     if (nargs != 0) {
1200         ca->cif.arg_types = ca->args;
1201         for (int j = 0; j < nargs; ++j) {
1202             int typecode = extract32(typemask, (j + 1) * 3, 3);
1203             ca->args[j] = typecode_to_ffi(typecode);
1204         }
1205     }
1206 
1207     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1208                           ca->cif.rtype, ca->cif.arg_types);
1209     assert(status == FFI_OK);
1210 
1211     return &ca->cif;
1212 }
1213 
1214 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1215 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1216 #else
1217 #define HELPER_INFO_INIT(I)      (&(I)->init)
1218 #define HELPER_INFO_INIT_VAL(I)  1
1219 #endif /* CONFIG_TCG_INTERPRETER */
1220 
1221 static inline bool arg_slot_reg_p(unsigned arg_slot)
1222 {
1223     /*
1224      * Split the sizeof away from the comparison to avoid Werror from
1225      * "unsigned < 0 is always false", when iarg_regs is empty.
1226      */
1227     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1228     return arg_slot < nreg;
1229 }
1230 
1231 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1232 {
1233     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1234     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1235 
1236     tcg_debug_assert(stk_slot < max);
1237     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1238 }
1239 
1240 typedef struct TCGCumulativeArgs {
1241     int arg_idx;                /* tcg_gen_callN args[] */
1242     int info_in_idx;            /* TCGHelperInfo in[] */
1243     int arg_slot;               /* regs+stack slot */
1244     int ref_slot;               /* stack slots for references */
1245 } TCGCumulativeArgs;
1246 
1247 static void layout_arg_even(TCGCumulativeArgs *cum)
1248 {
1249     cum->arg_slot += cum->arg_slot & 1;
1250 }
1251 
1252 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1253                          TCGCallArgumentKind kind)
1254 {
1255     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1256 
1257     *loc = (TCGCallArgumentLoc){
1258         .kind = kind,
1259         .arg_idx = cum->arg_idx,
1260         .arg_slot = cum->arg_slot,
1261     };
1262     cum->info_in_idx++;
1263     cum->arg_slot++;
1264 }
1265 
1266 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1267                                 TCGHelperInfo *info, int n)
1268 {
1269     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1270 
1271     for (int i = 0; i < n; ++i) {
1272         /* Layout all using the same arg_idx, adjusting the subindex. */
1273         loc[i] = (TCGCallArgumentLoc){
1274             .kind = TCG_CALL_ARG_NORMAL,
1275             .arg_idx = cum->arg_idx,
1276             .tmp_subindex = i,
1277             .arg_slot = cum->arg_slot + i,
1278         };
1279     }
1280     cum->info_in_idx += n;
1281     cum->arg_slot += n;
1282 }
1283 
1284 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1285 {
1286     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1287     int n = 128 / TCG_TARGET_REG_BITS;
1288 
1289     /* The first subindex carries the pointer. */
1290     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1291 
1292     /*
1293      * The callee is allowed to clobber memory associated with
1294      * structure pass by-reference.  Therefore we must make copies.
1295      * Allocate space from "ref_slot", which will be adjusted to
1296      * follow the parameters on the stack.
1297      */
1298     loc[0].ref_slot = cum->ref_slot;
1299 
1300     /*
1301      * Subsequent words also go into the reference slot, but
1302      * do not accumulate into the regular arguments.
1303      */
1304     for (int i = 1; i < n; ++i) {
1305         loc[i] = (TCGCallArgumentLoc){
1306             .kind = TCG_CALL_ARG_BY_REF_N,
1307             .arg_idx = cum->arg_idx,
1308             .tmp_subindex = i,
1309             .ref_slot = cum->ref_slot + i,
1310         };
1311     }
1312     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1313     cum->ref_slot += n;
1314 }
1315 
1316 static void init_call_layout(TCGHelperInfo *info)
1317 {
1318     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1319     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1320     unsigned typemask = info->typemask;
1321     unsigned typecode;
1322     TCGCumulativeArgs cum = { };
1323 
1324     /*
1325      * Parse and place any function return value.
1326      */
1327     typecode = typemask & 7;
1328     switch (typecode) {
1329     case dh_typecode_void:
1330         info->nr_out = 0;
1331         break;
1332     case dh_typecode_i32:
1333     case dh_typecode_s32:
1334     case dh_typecode_ptr:
1335         info->nr_out = 1;
1336         info->out_kind = TCG_CALL_RET_NORMAL;
1337         break;
1338     case dh_typecode_i64:
1339     case dh_typecode_s64:
1340         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1341         info->out_kind = TCG_CALL_RET_NORMAL;
1342         /* Query the last register now to trigger any assert early. */
1343         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1344         break;
1345     case dh_typecode_i128:
1346         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1347         info->out_kind = TCG_TARGET_CALL_RET_I128;
1348         switch (TCG_TARGET_CALL_RET_I128) {
1349         case TCG_CALL_RET_NORMAL:
1350             /* Query the last register now to trigger any assert early. */
1351             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1352             break;
1353         case TCG_CALL_RET_BY_VEC:
1354             /* Query the single register now to trigger any assert early. */
1355             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1356             break;
1357         case TCG_CALL_RET_BY_REF:
1358             /*
1359              * Allocate the first argument to the output.
1360              * We don't need to store this anywhere, just make it
1361              * unavailable for use in the input loop below.
1362              */
1363             cum.arg_slot = 1;
1364             break;
1365         default:
1366             qemu_build_not_reached();
1367         }
1368         break;
1369     default:
1370         g_assert_not_reached();
1371     }
1372 
1373     /*
1374      * Parse and place function arguments.
1375      */
1376     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1377         TCGCallArgumentKind kind;
1378         TCGType type;
1379 
1380         typecode = typemask & 7;
1381         switch (typecode) {
1382         case dh_typecode_i32:
1383         case dh_typecode_s32:
1384             type = TCG_TYPE_I32;
1385             break;
1386         case dh_typecode_i64:
1387         case dh_typecode_s64:
1388             type = TCG_TYPE_I64;
1389             break;
1390         case dh_typecode_ptr:
1391             type = TCG_TYPE_PTR;
1392             break;
1393         case dh_typecode_i128:
1394             type = TCG_TYPE_I128;
1395             break;
1396         default:
1397             g_assert_not_reached();
1398         }
1399 
1400         switch (type) {
1401         case TCG_TYPE_I32:
1402             switch (TCG_TARGET_CALL_ARG_I32) {
1403             case TCG_CALL_ARG_EVEN:
1404                 layout_arg_even(&cum);
1405                 /* fall through */
1406             case TCG_CALL_ARG_NORMAL:
1407                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1408                 break;
1409             case TCG_CALL_ARG_EXTEND:
1410                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1411                 layout_arg_1(&cum, info, kind);
1412                 break;
1413             default:
1414                 qemu_build_not_reached();
1415             }
1416             break;
1417 
1418         case TCG_TYPE_I64:
1419             switch (TCG_TARGET_CALL_ARG_I64) {
1420             case TCG_CALL_ARG_EVEN:
1421                 layout_arg_even(&cum);
1422                 /* fall through */
1423             case TCG_CALL_ARG_NORMAL:
1424                 if (TCG_TARGET_REG_BITS == 32) {
1425                     layout_arg_normal_n(&cum, info, 2);
1426                 } else {
1427                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1428                 }
1429                 break;
1430             default:
1431                 qemu_build_not_reached();
1432             }
1433             break;
1434 
1435         case TCG_TYPE_I128:
1436             switch (TCG_TARGET_CALL_ARG_I128) {
1437             case TCG_CALL_ARG_EVEN:
1438                 layout_arg_even(&cum);
1439                 /* fall through */
1440             case TCG_CALL_ARG_NORMAL:
1441                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1442                 break;
1443             case TCG_CALL_ARG_BY_REF:
1444                 layout_arg_by_ref(&cum, info);
1445                 break;
1446             default:
1447                 qemu_build_not_reached();
1448             }
1449             break;
1450 
1451         default:
1452             g_assert_not_reached();
1453         }
1454     }
1455     info->nr_in = cum.info_in_idx;
1456 
1457     /* Validate that we didn't overrun the input array. */
1458     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1459     /* Validate the backend has enough argument space. */
1460     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1461 
1462     /*
1463      * Relocate the "ref_slot" area to the end of the parameters.
1464      * Minimizing this stack offset helps code size for x86,
1465      * which has a signed 8-bit offset encoding.
1466      */
1467     if (cum.ref_slot != 0) {
1468         int ref_base = 0;
1469 
1470         if (cum.arg_slot > max_reg_slots) {
1471             int align = __alignof(Int128) / sizeof(tcg_target_long);
1472 
1473             ref_base = cum.arg_slot - max_reg_slots;
1474             if (align > 1) {
1475                 ref_base = ROUND_UP(ref_base, align);
1476             }
1477         }
1478         assert(ref_base + cum.ref_slot <= max_stk_slots);
1479         ref_base += max_reg_slots;
1480 
1481         if (ref_base != 0) {
1482             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1483                 TCGCallArgumentLoc *loc = &info->in[i];
1484                 switch (loc->kind) {
1485                 case TCG_CALL_ARG_BY_REF:
1486                 case TCG_CALL_ARG_BY_REF_N:
1487                     loc->ref_slot += ref_base;
1488                     break;
1489                 default:
1490                     break;
1491                 }
1492             }
1493         }
1494     }
1495 }
1496 
1497 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1498 static void process_constraint_sets(void);
1499 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1500                                             TCGReg reg, const char *name);
1501 
1502 static void tcg_context_init(unsigned max_cpus)
1503 {
1504     TCGContext *s = &tcg_init_ctx;
1505     int n, i;
1506     TCGTemp *ts;
1507 
1508     memset(s, 0, sizeof(*s));
1509     s->nb_globals = 0;
1510 
1511     init_call_layout(&info_helper_ld32_mmu);
1512     init_call_layout(&info_helper_ld64_mmu);
1513     init_call_layout(&info_helper_ld128_mmu);
1514     init_call_layout(&info_helper_st32_mmu);
1515     init_call_layout(&info_helper_st64_mmu);
1516     init_call_layout(&info_helper_st128_mmu);
1517 
1518     tcg_target_init(s);
1519     process_constraint_sets();
1520 
1521     /* Reverse the order of the saved registers, assuming they're all at
1522        the start of tcg_target_reg_alloc_order.  */
1523     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1524         int r = tcg_target_reg_alloc_order[n];
1525         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1526             break;
1527         }
1528     }
1529     for (i = 0; i < n; ++i) {
1530         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1531     }
1532     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1533         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1534     }
1535 
1536     tcg_ctx = s;
1537     /*
1538      * In user-mode we simply share the init context among threads, since we
1539      * use a single region. See the documentation tcg_region_init() for the
1540      * reasoning behind this.
1541      * In system-mode we will have at most max_cpus TCG threads.
1542      */
1543 #ifdef CONFIG_USER_ONLY
1544     tcg_ctxs = &tcg_ctx;
1545     tcg_cur_ctxs = 1;
1546     tcg_max_ctxs = 1;
1547 #else
1548     tcg_max_ctxs = max_cpus;
1549     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1550 #endif
1551 
1552     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1553     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1554     tcg_env = temp_tcgv_ptr(ts);
1555 }
1556 
1557 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1558 {
1559     tcg_context_init(max_cpus);
1560     tcg_region_init(tb_size, splitwx, max_cpus);
1561 }
1562 
1563 /*
1564  * Allocate TBs right before their corresponding translated code, making
1565  * sure that TBs and code are on different cache lines.
1566  */
1567 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1568 {
1569     uintptr_t align = qemu_icache_linesize;
1570     TranslationBlock *tb;
1571     void *next;
1572 
1573  retry:
1574     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1575     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1576 
1577     if (unlikely(next > s->code_gen_highwater)) {
1578         if (tcg_region_alloc(s)) {
1579             return NULL;
1580         }
1581         goto retry;
1582     }
1583     qatomic_set(&s->code_gen_ptr, next);
1584     return tb;
1585 }
1586 
1587 void tcg_prologue_init(void)
1588 {
1589     TCGContext *s = tcg_ctx;
1590     size_t prologue_size;
1591 
1592     s->code_ptr = s->code_gen_ptr;
1593     s->code_buf = s->code_gen_ptr;
1594     s->data_gen_ptr = NULL;
1595 
1596 #ifndef CONFIG_TCG_INTERPRETER
1597     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1598 #endif
1599 
1600     s->pool_labels = NULL;
1601 
1602     qemu_thread_jit_write();
1603     /* Generate the prologue.  */
1604     tcg_target_qemu_prologue(s);
1605 
1606     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1607     {
1608         int result = tcg_out_pool_finalize(s);
1609         tcg_debug_assert(result == 0);
1610     }
1611 
1612     prologue_size = tcg_current_code_size(s);
1613     perf_report_prologue(s->code_gen_ptr, prologue_size);
1614 
1615 #ifndef CONFIG_TCG_INTERPRETER
1616     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1617                         (uintptr_t)s->code_buf, prologue_size);
1618 #endif
1619 
1620     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1621         FILE *logfile = qemu_log_trylock();
1622         if (logfile) {
1623             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1624             if (s->data_gen_ptr) {
1625                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1626                 size_t data_size = prologue_size - code_size;
1627                 size_t i;
1628 
1629                 disas(logfile, s->code_gen_ptr, code_size);
1630 
1631                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1632                     if (sizeof(tcg_target_ulong) == 8) {
1633                         fprintf(logfile,
1634                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1635                                 (uintptr_t)s->data_gen_ptr + i,
1636                                 *(uint64_t *)(s->data_gen_ptr + i));
1637                     } else {
1638                         fprintf(logfile,
1639                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1640                                 (uintptr_t)s->data_gen_ptr + i,
1641                                 *(uint32_t *)(s->data_gen_ptr + i));
1642                     }
1643                 }
1644             } else {
1645                 disas(logfile, s->code_gen_ptr, prologue_size);
1646             }
1647             fprintf(logfile, "\n");
1648             qemu_log_unlock(logfile);
1649         }
1650     }
1651 
1652 #ifndef CONFIG_TCG_INTERPRETER
1653     /*
1654      * Assert that goto_ptr is implemented completely, setting an epilogue.
1655      * For tci, we use NULL as the signal to return from the interpreter,
1656      * so skip this check.
1657      */
1658     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1659 #endif
1660 
1661     tcg_region_prologue_set(s);
1662 }
1663 
1664 void tcg_func_start(TCGContext *s)
1665 {
1666     tcg_pool_reset(s);
1667     s->nb_temps = s->nb_globals;
1668 
1669     /* No temps have been previously allocated for size or locality.  */
1670     tcg_temp_ebb_reset_freed(s);
1671 
1672     /* No constant temps have been previously allocated. */
1673     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1674         if (s->const_table[i]) {
1675             g_hash_table_remove_all(s->const_table[i]);
1676         }
1677     }
1678 
1679     s->nb_ops = 0;
1680     s->nb_labels = 0;
1681     s->current_frame_offset = s->frame_start;
1682 
1683 #ifdef CONFIG_DEBUG_TCG
1684     s->goto_tb_issue_mask = 0;
1685 #endif
1686 
1687     QTAILQ_INIT(&s->ops);
1688     QTAILQ_INIT(&s->free_ops);
1689     s->emit_before_op = NULL;
1690     QSIMPLEQ_INIT(&s->labels);
1691 
1692     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1693     tcg_debug_assert(s->insn_start_words > 0);
1694 }
1695 
1696 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1697 {
1698     int n = s->nb_temps++;
1699 
1700     if (n >= TCG_MAX_TEMPS) {
1701         tcg_raise_tb_overflow(s);
1702     }
1703     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1704 }
1705 
1706 static TCGTemp *tcg_global_alloc(TCGContext *s)
1707 {
1708     TCGTemp *ts;
1709 
1710     tcg_debug_assert(s->nb_globals == s->nb_temps);
1711     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1712     s->nb_globals++;
1713     ts = tcg_temp_alloc(s);
1714     ts->kind = TEMP_GLOBAL;
1715 
1716     return ts;
1717 }
1718 
1719 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1720                                             TCGReg reg, const char *name)
1721 {
1722     TCGTemp *ts;
1723 
1724     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1725 
1726     ts = tcg_global_alloc(s);
1727     ts->base_type = type;
1728     ts->type = type;
1729     ts->kind = TEMP_FIXED;
1730     ts->reg = reg;
1731     ts->name = name;
1732     tcg_regset_set_reg(s->reserved_regs, reg);
1733 
1734     return ts;
1735 }
1736 
1737 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1738 {
1739     s->frame_start = start;
1740     s->frame_end = start + size;
1741     s->frame_temp
1742         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1743 }
1744 
1745 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1746                                             const char *name, TCGType type)
1747 {
1748     TCGContext *s = tcg_ctx;
1749     TCGTemp *base_ts = tcgv_ptr_temp(base);
1750     TCGTemp *ts = tcg_global_alloc(s);
1751     int indirect_reg = 0;
1752 
1753     switch (base_ts->kind) {
1754     case TEMP_FIXED:
1755         break;
1756     case TEMP_GLOBAL:
1757         /* We do not support double-indirect registers.  */
1758         tcg_debug_assert(!base_ts->indirect_reg);
1759         base_ts->indirect_base = 1;
1760         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1761                             ? 2 : 1);
1762         indirect_reg = 1;
1763         break;
1764     default:
1765         g_assert_not_reached();
1766     }
1767 
1768     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1769         TCGTemp *ts2 = tcg_global_alloc(s);
1770         char buf[64];
1771 
1772         ts->base_type = TCG_TYPE_I64;
1773         ts->type = TCG_TYPE_I32;
1774         ts->indirect_reg = indirect_reg;
1775         ts->mem_allocated = 1;
1776         ts->mem_base = base_ts;
1777         ts->mem_offset = offset;
1778         pstrcpy(buf, sizeof(buf), name);
1779         pstrcat(buf, sizeof(buf), "_0");
1780         ts->name = strdup(buf);
1781 
1782         tcg_debug_assert(ts2 == ts + 1);
1783         ts2->base_type = TCG_TYPE_I64;
1784         ts2->type = TCG_TYPE_I32;
1785         ts2->indirect_reg = indirect_reg;
1786         ts2->mem_allocated = 1;
1787         ts2->mem_base = base_ts;
1788         ts2->mem_offset = offset + 4;
1789         ts2->temp_subindex = 1;
1790         pstrcpy(buf, sizeof(buf), name);
1791         pstrcat(buf, sizeof(buf), "_1");
1792         ts2->name = strdup(buf);
1793     } else {
1794         ts->base_type = type;
1795         ts->type = type;
1796         ts->indirect_reg = indirect_reg;
1797         ts->mem_allocated = 1;
1798         ts->mem_base = base_ts;
1799         ts->mem_offset = offset;
1800         ts->name = name;
1801     }
1802     return ts;
1803 }
1804 
1805 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1806 {
1807     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1808     return temp_tcgv_i32(ts);
1809 }
1810 
1811 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1812 {
1813     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1814     return temp_tcgv_i64(ts);
1815 }
1816 
1817 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1818 {
1819     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1820     return temp_tcgv_ptr(ts);
1821 }
1822 
1823 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1824 {
1825     TCGContext *s = tcg_ctx;
1826     TCGTemp *ts;
1827     int n;
1828 
1829     if (kind == TEMP_EBB) {
1830         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1831 
1832         if (idx < TCG_MAX_TEMPS) {
1833             /* There is already an available temp with the right type.  */
1834             clear_bit(idx, s->free_temps[type].l);
1835 
1836             ts = &s->temps[idx];
1837             ts->temp_allocated = 1;
1838             tcg_debug_assert(ts->base_type == type);
1839             tcg_debug_assert(ts->kind == kind);
1840             return ts;
1841         }
1842     } else {
1843         tcg_debug_assert(kind == TEMP_TB);
1844     }
1845 
1846     switch (type) {
1847     case TCG_TYPE_I32:
1848     case TCG_TYPE_V64:
1849     case TCG_TYPE_V128:
1850     case TCG_TYPE_V256:
1851         n = 1;
1852         break;
1853     case TCG_TYPE_I64:
1854         n = 64 / TCG_TARGET_REG_BITS;
1855         break;
1856     case TCG_TYPE_I128:
1857         n = 128 / TCG_TARGET_REG_BITS;
1858         break;
1859     default:
1860         g_assert_not_reached();
1861     }
1862 
1863     ts = tcg_temp_alloc(s);
1864     ts->base_type = type;
1865     ts->temp_allocated = 1;
1866     ts->kind = kind;
1867 
1868     if (n == 1) {
1869         ts->type = type;
1870     } else {
1871         ts->type = TCG_TYPE_REG;
1872 
1873         for (int i = 1; i < n; ++i) {
1874             TCGTemp *ts2 = tcg_temp_alloc(s);
1875 
1876             tcg_debug_assert(ts2 == ts + i);
1877             ts2->base_type = type;
1878             ts2->type = TCG_TYPE_REG;
1879             ts2->temp_allocated = 1;
1880             ts2->temp_subindex = i;
1881             ts2->kind = kind;
1882         }
1883     }
1884     return ts;
1885 }
1886 
1887 TCGv_i32 tcg_temp_new_i32(void)
1888 {
1889     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1890 }
1891 
1892 TCGv_i32 tcg_temp_ebb_new_i32(void)
1893 {
1894     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1895 }
1896 
1897 TCGv_i64 tcg_temp_new_i64(void)
1898 {
1899     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1900 }
1901 
1902 TCGv_i64 tcg_temp_ebb_new_i64(void)
1903 {
1904     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1905 }
1906 
1907 TCGv_ptr tcg_temp_new_ptr(void)
1908 {
1909     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1910 }
1911 
1912 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1913 {
1914     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1915 }
1916 
1917 TCGv_i128 tcg_temp_new_i128(void)
1918 {
1919     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1920 }
1921 
1922 TCGv_i128 tcg_temp_ebb_new_i128(void)
1923 {
1924     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1925 }
1926 
1927 TCGv_vec tcg_temp_new_vec(TCGType type)
1928 {
1929     TCGTemp *t;
1930 
1931 #ifdef CONFIG_DEBUG_TCG
1932     switch (type) {
1933     case TCG_TYPE_V64:
1934         assert(TCG_TARGET_HAS_v64);
1935         break;
1936     case TCG_TYPE_V128:
1937         assert(TCG_TARGET_HAS_v128);
1938         break;
1939     case TCG_TYPE_V256:
1940         assert(TCG_TARGET_HAS_v256);
1941         break;
1942     default:
1943         g_assert_not_reached();
1944     }
1945 #endif
1946 
1947     t = tcg_temp_new_internal(type, TEMP_EBB);
1948     return temp_tcgv_vec(t);
1949 }
1950 
1951 /* Create a new temp of the same type as an existing temp.  */
1952 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1953 {
1954     TCGTemp *t = tcgv_vec_temp(match);
1955 
1956     tcg_debug_assert(t->temp_allocated != 0);
1957 
1958     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1959     return temp_tcgv_vec(t);
1960 }
1961 
1962 void tcg_temp_free_internal(TCGTemp *ts)
1963 {
1964     TCGContext *s = tcg_ctx;
1965 
1966     switch (ts->kind) {
1967     case TEMP_CONST:
1968     case TEMP_TB:
1969         /* Silently ignore free. */
1970         break;
1971     case TEMP_EBB:
1972         tcg_debug_assert(ts->temp_allocated != 0);
1973         ts->temp_allocated = 0;
1974         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1975         break;
1976     default:
1977         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1978         g_assert_not_reached();
1979     }
1980 }
1981 
1982 void tcg_temp_free_i32(TCGv_i32 arg)
1983 {
1984     tcg_temp_free_internal(tcgv_i32_temp(arg));
1985 }
1986 
1987 void tcg_temp_free_i64(TCGv_i64 arg)
1988 {
1989     tcg_temp_free_internal(tcgv_i64_temp(arg));
1990 }
1991 
1992 void tcg_temp_free_i128(TCGv_i128 arg)
1993 {
1994     tcg_temp_free_internal(tcgv_i128_temp(arg));
1995 }
1996 
1997 void tcg_temp_free_ptr(TCGv_ptr arg)
1998 {
1999     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2000 }
2001 
2002 void tcg_temp_free_vec(TCGv_vec arg)
2003 {
2004     tcg_temp_free_internal(tcgv_vec_temp(arg));
2005 }
2006 
2007 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2008 {
2009     TCGContext *s = tcg_ctx;
2010     GHashTable *h = s->const_table[type];
2011     TCGTemp *ts;
2012 
2013     if (h == NULL) {
2014         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2015         s->const_table[type] = h;
2016     }
2017 
2018     ts = g_hash_table_lookup(h, &val);
2019     if (ts == NULL) {
2020         int64_t *val_ptr;
2021 
2022         ts = tcg_temp_alloc(s);
2023 
2024         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2025             TCGTemp *ts2 = tcg_temp_alloc(s);
2026 
2027             tcg_debug_assert(ts2 == ts + 1);
2028 
2029             ts->base_type = TCG_TYPE_I64;
2030             ts->type = TCG_TYPE_I32;
2031             ts->kind = TEMP_CONST;
2032             ts->temp_allocated = 1;
2033 
2034             ts2->base_type = TCG_TYPE_I64;
2035             ts2->type = TCG_TYPE_I32;
2036             ts2->kind = TEMP_CONST;
2037             ts2->temp_allocated = 1;
2038             ts2->temp_subindex = 1;
2039 
2040             /*
2041              * Retain the full value of the 64-bit constant in the low
2042              * part, so that the hash table works.  Actual uses will
2043              * truncate the value to the low part.
2044              */
2045             ts[HOST_BIG_ENDIAN].val = val;
2046             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2047             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2048         } else {
2049             ts->base_type = type;
2050             ts->type = type;
2051             ts->kind = TEMP_CONST;
2052             ts->temp_allocated = 1;
2053             ts->val = val;
2054             val_ptr = &ts->val;
2055         }
2056         g_hash_table_insert(h, val_ptr, ts);
2057     }
2058 
2059     return ts;
2060 }
2061 
2062 TCGv_i32 tcg_constant_i32(int32_t val)
2063 {
2064     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2065 }
2066 
2067 TCGv_i64 tcg_constant_i64(int64_t val)
2068 {
2069     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2070 }
2071 
2072 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2073 {
2074     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2075 }
2076 
2077 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2078 {
2079     val = dup_const(vece, val);
2080     return temp_tcgv_vec(tcg_constant_internal(type, val));
2081 }
2082 
2083 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2084 {
2085     TCGTemp *t = tcgv_vec_temp(match);
2086 
2087     tcg_debug_assert(t->temp_allocated != 0);
2088     return tcg_constant_vec(t->base_type, vece, val);
2089 }
2090 
2091 #ifdef CONFIG_DEBUG_TCG
2092 size_t temp_idx(TCGTemp *ts)
2093 {
2094     ptrdiff_t n = ts - tcg_ctx->temps;
2095     assert(n >= 0 && n < tcg_ctx->nb_temps);
2096     return n;
2097 }
2098 
2099 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2100 {
2101     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2102 
2103     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2104     assert(o % sizeof(TCGTemp) == 0);
2105 
2106     return (void *)tcg_ctx + (uintptr_t)v;
2107 }
2108 #endif /* CONFIG_DEBUG_TCG */
2109 
2110 /*
2111  * Return true if OP may appear in the opcode stream with TYPE.
2112  * Test the runtime variable that controls each opcode.
2113  */
2114 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2115 {
2116     bool has_type;
2117 
2118     switch (type) {
2119     case TCG_TYPE_I32:
2120         has_type = true;
2121         break;
2122     case TCG_TYPE_I64:
2123         has_type = TCG_TARGET_REG_BITS == 64;
2124         break;
2125     case TCG_TYPE_V64:
2126         has_type = TCG_TARGET_HAS_v64;
2127         break;
2128     case TCG_TYPE_V128:
2129         has_type = TCG_TARGET_HAS_v128;
2130         break;
2131     case TCG_TYPE_V256:
2132         has_type = TCG_TARGET_HAS_v256;
2133         break;
2134     default:
2135         has_type = false;
2136         break;
2137     }
2138 
2139     switch (op) {
2140     case INDEX_op_discard:
2141     case INDEX_op_set_label:
2142     case INDEX_op_call:
2143     case INDEX_op_br:
2144     case INDEX_op_mb:
2145     case INDEX_op_insn_start:
2146     case INDEX_op_exit_tb:
2147     case INDEX_op_goto_tb:
2148     case INDEX_op_goto_ptr:
2149     case INDEX_op_qemu_ld_i32:
2150     case INDEX_op_qemu_st_i32:
2151     case INDEX_op_qemu_ld_i64:
2152     case INDEX_op_qemu_st_i64:
2153         return true;
2154 
2155     case INDEX_op_qemu_st8_i32:
2156         return TCG_TARGET_HAS_qemu_st8_i32;
2157 
2158     case INDEX_op_qemu_ld_i128:
2159     case INDEX_op_qemu_st_i128:
2160         return TCG_TARGET_HAS_qemu_ldst_i128;
2161 
2162     case INDEX_op_mov_i32:
2163     case INDEX_op_setcond_i32:
2164     case INDEX_op_brcond_i32:
2165     case INDEX_op_movcond_i32:
2166     case INDEX_op_ld8u_i32:
2167     case INDEX_op_ld8s_i32:
2168     case INDEX_op_ld16u_i32:
2169     case INDEX_op_ld16s_i32:
2170     case INDEX_op_ld_i32:
2171     case INDEX_op_st8_i32:
2172     case INDEX_op_st16_i32:
2173     case INDEX_op_st_i32:
2174     case INDEX_op_add_i32:
2175     case INDEX_op_sub_i32:
2176     case INDEX_op_neg_i32:
2177     case INDEX_op_mul_i32:
2178     case INDEX_op_and_i32:
2179     case INDEX_op_or_i32:
2180     case INDEX_op_xor_i32:
2181     case INDEX_op_shl_i32:
2182     case INDEX_op_shr_i32:
2183     case INDEX_op_sar_i32:
2184     case INDEX_op_extract_i32:
2185     case INDEX_op_sextract_i32:
2186     case INDEX_op_deposit_i32:
2187         return true;
2188 
2189     case INDEX_op_negsetcond_i32:
2190         return TCG_TARGET_HAS_negsetcond_i32;
2191     case INDEX_op_div_i32:
2192     case INDEX_op_divu_i32:
2193         return TCG_TARGET_HAS_div_i32;
2194     case INDEX_op_rem_i32:
2195     case INDEX_op_remu_i32:
2196         return TCG_TARGET_HAS_rem_i32;
2197     case INDEX_op_div2_i32:
2198     case INDEX_op_divu2_i32:
2199         return TCG_TARGET_HAS_div2_i32;
2200     case INDEX_op_rotl_i32:
2201     case INDEX_op_rotr_i32:
2202         return TCG_TARGET_HAS_rot_i32;
2203     case INDEX_op_extract2_i32:
2204         return TCG_TARGET_HAS_extract2_i32;
2205     case INDEX_op_add2_i32:
2206         return TCG_TARGET_HAS_add2_i32;
2207     case INDEX_op_sub2_i32:
2208         return TCG_TARGET_HAS_sub2_i32;
2209     case INDEX_op_mulu2_i32:
2210         return TCG_TARGET_HAS_mulu2_i32;
2211     case INDEX_op_muls2_i32:
2212         return TCG_TARGET_HAS_muls2_i32;
2213     case INDEX_op_muluh_i32:
2214         return TCG_TARGET_HAS_muluh_i32;
2215     case INDEX_op_mulsh_i32:
2216         return TCG_TARGET_HAS_mulsh_i32;
2217     case INDEX_op_ext8s_i32:
2218         return TCG_TARGET_HAS_ext8s_i32;
2219     case INDEX_op_ext16s_i32:
2220         return TCG_TARGET_HAS_ext16s_i32;
2221     case INDEX_op_ext8u_i32:
2222         return TCG_TARGET_HAS_ext8u_i32;
2223     case INDEX_op_ext16u_i32:
2224         return TCG_TARGET_HAS_ext16u_i32;
2225     case INDEX_op_bswap16_i32:
2226         return TCG_TARGET_HAS_bswap16_i32;
2227     case INDEX_op_bswap32_i32:
2228         return TCG_TARGET_HAS_bswap32_i32;
2229     case INDEX_op_not_i32:
2230         return TCG_TARGET_HAS_not_i32;
2231     case INDEX_op_andc_i32:
2232         return TCG_TARGET_HAS_andc_i32;
2233     case INDEX_op_orc_i32:
2234         return TCG_TARGET_HAS_orc_i32;
2235     case INDEX_op_eqv_i32:
2236         return TCG_TARGET_HAS_eqv_i32;
2237     case INDEX_op_nand_i32:
2238         return TCG_TARGET_HAS_nand_i32;
2239     case INDEX_op_nor_i32:
2240         return TCG_TARGET_HAS_nor_i32;
2241     case INDEX_op_clz_i32:
2242         return TCG_TARGET_HAS_clz_i32;
2243     case INDEX_op_ctz_i32:
2244         return TCG_TARGET_HAS_ctz_i32;
2245     case INDEX_op_ctpop_i32:
2246         return TCG_TARGET_HAS_ctpop_i32;
2247 
2248     case INDEX_op_brcond2_i32:
2249     case INDEX_op_setcond2_i32:
2250         return TCG_TARGET_REG_BITS == 32;
2251 
2252     case INDEX_op_mov_i64:
2253     case INDEX_op_setcond_i64:
2254     case INDEX_op_brcond_i64:
2255     case INDEX_op_movcond_i64:
2256     case INDEX_op_ld8u_i64:
2257     case INDEX_op_ld8s_i64:
2258     case INDEX_op_ld16u_i64:
2259     case INDEX_op_ld16s_i64:
2260     case INDEX_op_ld32u_i64:
2261     case INDEX_op_ld32s_i64:
2262     case INDEX_op_ld_i64:
2263     case INDEX_op_st8_i64:
2264     case INDEX_op_st16_i64:
2265     case INDEX_op_st32_i64:
2266     case INDEX_op_st_i64:
2267     case INDEX_op_add_i64:
2268     case INDEX_op_sub_i64:
2269     case INDEX_op_neg_i64:
2270     case INDEX_op_mul_i64:
2271     case INDEX_op_and_i64:
2272     case INDEX_op_or_i64:
2273     case INDEX_op_xor_i64:
2274     case INDEX_op_shl_i64:
2275     case INDEX_op_shr_i64:
2276     case INDEX_op_sar_i64:
2277     case INDEX_op_ext_i32_i64:
2278     case INDEX_op_extu_i32_i64:
2279     case INDEX_op_extract_i64:
2280     case INDEX_op_sextract_i64:
2281     case INDEX_op_deposit_i64:
2282         return TCG_TARGET_REG_BITS == 64;
2283 
2284     case INDEX_op_negsetcond_i64:
2285         return TCG_TARGET_HAS_negsetcond_i64;
2286     case INDEX_op_div_i64:
2287     case INDEX_op_divu_i64:
2288         return TCG_TARGET_HAS_div_i64;
2289     case INDEX_op_rem_i64:
2290     case INDEX_op_remu_i64:
2291         return TCG_TARGET_HAS_rem_i64;
2292     case INDEX_op_div2_i64:
2293     case INDEX_op_divu2_i64:
2294         return TCG_TARGET_HAS_div2_i64;
2295     case INDEX_op_rotl_i64:
2296     case INDEX_op_rotr_i64:
2297         return TCG_TARGET_HAS_rot_i64;
2298     case INDEX_op_extract2_i64:
2299         return TCG_TARGET_HAS_extract2_i64;
2300     case INDEX_op_extrl_i64_i32:
2301     case INDEX_op_extrh_i64_i32:
2302         return TCG_TARGET_HAS_extr_i64_i32;
2303     case INDEX_op_ext8s_i64:
2304         return TCG_TARGET_HAS_ext8s_i64;
2305     case INDEX_op_ext16s_i64:
2306         return TCG_TARGET_HAS_ext16s_i64;
2307     case INDEX_op_ext32s_i64:
2308         return TCG_TARGET_HAS_ext32s_i64;
2309     case INDEX_op_ext8u_i64:
2310         return TCG_TARGET_HAS_ext8u_i64;
2311     case INDEX_op_ext16u_i64:
2312         return TCG_TARGET_HAS_ext16u_i64;
2313     case INDEX_op_ext32u_i64:
2314         return TCG_TARGET_HAS_ext32u_i64;
2315     case INDEX_op_bswap16_i64:
2316         return TCG_TARGET_HAS_bswap16_i64;
2317     case INDEX_op_bswap32_i64:
2318         return TCG_TARGET_HAS_bswap32_i64;
2319     case INDEX_op_bswap64_i64:
2320         return TCG_TARGET_HAS_bswap64_i64;
2321     case INDEX_op_not_i64:
2322         return TCG_TARGET_HAS_not_i64;
2323     case INDEX_op_andc_i64:
2324         return TCG_TARGET_HAS_andc_i64;
2325     case INDEX_op_orc_i64:
2326         return TCG_TARGET_HAS_orc_i64;
2327     case INDEX_op_eqv_i64:
2328         return TCG_TARGET_HAS_eqv_i64;
2329     case INDEX_op_nand_i64:
2330         return TCG_TARGET_HAS_nand_i64;
2331     case INDEX_op_nor_i64:
2332         return TCG_TARGET_HAS_nor_i64;
2333     case INDEX_op_clz_i64:
2334         return TCG_TARGET_HAS_clz_i64;
2335     case INDEX_op_ctz_i64:
2336         return TCG_TARGET_HAS_ctz_i64;
2337     case INDEX_op_ctpop_i64:
2338         return TCG_TARGET_HAS_ctpop_i64;
2339     case INDEX_op_add2_i64:
2340         return TCG_TARGET_HAS_add2_i64;
2341     case INDEX_op_sub2_i64:
2342         return TCG_TARGET_HAS_sub2_i64;
2343     case INDEX_op_mulu2_i64:
2344         return TCG_TARGET_HAS_mulu2_i64;
2345     case INDEX_op_muls2_i64:
2346         return TCG_TARGET_HAS_muls2_i64;
2347     case INDEX_op_muluh_i64:
2348         return TCG_TARGET_HAS_muluh_i64;
2349     case INDEX_op_mulsh_i64:
2350         return TCG_TARGET_HAS_mulsh_i64;
2351 
2352     case INDEX_op_mov_vec:
2353     case INDEX_op_dup_vec:
2354     case INDEX_op_dupm_vec:
2355     case INDEX_op_ld_vec:
2356     case INDEX_op_st_vec:
2357     case INDEX_op_add_vec:
2358     case INDEX_op_sub_vec:
2359     case INDEX_op_and_vec:
2360     case INDEX_op_or_vec:
2361     case INDEX_op_xor_vec:
2362     case INDEX_op_cmp_vec:
2363         return has_type;
2364     case INDEX_op_dup2_vec:
2365         return has_type && TCG_TARGET_REG_BITS == 32;
2366     case INDEX_op_not_vec:
2367         return has_type && TCG_TARGET_HAS_not_vec;
2368     case INDEX_op_neg_vec:
2369         return has_type && TCG_TARGET_HAS_neg_vec;
2370     case INDEX_op_abs_vec:
2371         return has_type && TCG_TARGET_HAS_abs_vec;
2372     case INDEX_op_andc_vec:
2373         return has_type && TCG_TARGET_HAS_andc_vec;
2374     case INDEX_op_orc_vec:
2375         return has_type && TCG_TARGET_HAS_orc_vec;
2376     case INDEX_op_nand_vec:
2377         return has_type && TCG_TARGET_HAS_nand_vec;
2378     case INDEX_op_nor_vec:
2379         return has_type && TCG_TARGET_HAS_nor_vec;
2380     case INDEX_op_eqv_vec:
2381         return has_type && TCG_TARGET_HAS_eqv_vec;
2382     case INDEX_op_mul_vec:
2383         return has_type && TCG_TARGET_HAS_mul_vec;
2384     case INDEX_op_shli_vec:
2385     case INDEX_op_shri_vec:
2386     case INDEX_op_sari_vec:
2387         return has_type && TCG_TARGET_HAS_shi_vec;
2388     case INDEX_op_shls_vec:
2389     case INDEX_op_shrs_vec:
2390     case INDEX_op_sars_vec:
2391         return has_type && TCG_TARGET_HAS_shs_vec;
2392     case INDEX_op_shlv_vec:
2393     case INDEX_op_shrv_vec:
2394     case INDEX_op_sarv_vec:
2395         return has_type && TCG_TARGET_HAS_shv_vec;
2396     case INDEX_op_rotli_vec:
2397         return has_type && TCG_TARGET_HAS_roti_vec;
2398     case INDEX_op_rotls_vec:
2399         return has_type && TCG_TARGET_HAS_rots_vec;
2400     case INDEX_op_rotlv_vec:
2401     case INDEX_op_rotrv_vec:
2402         return has_type && TCG_TARGET_HAS_rotv_vec;
2403     case INDEX_op_ssadd_vec:
2404     case INDEX_op_usadd_vec:
2405     case INDEX_op_sssub_vec:
2406     case INDEX_op_ussub_vec:
2407         return has_type && TCG_TARGET_HAS_sat_vec;
2408     case INDEX_op_smin_vec:
2409     case INDEX_op_umin_vec:
2410     case INDEX_op_smax_vec:
2411     case INDEX_op_umax_vec:
2412         return has_type && TCG_TARGET_HAS_minmax_vec;
2413     case INDEX_op_bitsel_vec:
2414         return has_type && TCG_TARGET_HAS_bitsel_vec;
2415     case INDEX_op_cmpsel_vec:
2416         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2417 
2418     default:
2419         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2420         return true;
2421     }
2422 }
2423 
2424 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2425 {
2426     unsigned width;
2427 
2428     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2429     width = (type == TCG_TYPE_I32 ? 32 : 64);
2430 
2431     tcg_debug_assert(ofs < width);
2432     tcg_debug_assert(len > 0);
2433     tcg_debug_assert(len <= width - ofs);
2434 
2435     return TCG_TARGET_deposit_valid(type, ofs, len);
2436 }
2437 
2438 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2439 
2440 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2441                           TCGTemp *ret, TCGTemp **args)
2442 {
2443     TCGv_i64 extend_free[MAX_CALL_IARGS];
2444     int n_extend = 0;
2445     TCGOp *op;
2446     int i, n, pi = 0, total_args;
2447 
2448     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2449         init_call_layout(info);
2450         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2451     }
2452 
2453     total_args = info->nr_out + info->nr_in + 2;
2454     op = tcg_op_alloc(INDEX_op_call, total_args);
2455 
2456 #ifdef CONFIG_PLUGIN
2457     /* Flag helpers that may affect guest state */
2458     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2459         tcg_ctx->plugin_insn->calls_helpers = true;
2460     }
2461 #endif
2462 
2463     TCGOP_CALLO(op) = n = info->nr_out;
2464     switch (n) {
2465     case 0:
2466         tcg_debug_assert(ret == NULL);
2467         break;
2468     case 1:
2469         tcg_debug_assert(ret != NULL);
2470         op->args[pi++] = temp_arg(ret);
2471         break;
2472     case 2:
2473     case 4:
2474         tcg_debug_assert(ret != NULL);
2475         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2476         tcg_debug_assert(ret->temp_subindex == 0);
2477         for (i = 0; i < n; ++i) {
2478             op->args[pi++] = temp_arg(ret + i);
2479         }
2480         break;
2481     default:
2482         g_assert_not_reached();
2483     }
2484 
2485     TCGOP_CALLI(op) = n = info->nr_in;
2486     for (i = 0; i < n; i++) {
2487         const TCGCallArgumentLoc *loc = &info->in[i];
2488         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2489 
2490         switch (loc->kind) {
2491         case TCG_CALL_ARG_NORMAL:
2492         case TCG_CALL_ARG_BY_REF:
2493         case TCG_CALL_ARG_BY_REF_N:
2494             op->args[pi++] = temp_arg(ts);
2495             break;
2496 
2497         case TCG_CALL_ARG_EXTEND_U:
2498         case TCG_CALL_ARG_EXTEND_S:
2499             {
2500                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2501                 TCGv_i32 orig = temp_tcgv_i32(ts);
2502 
2503                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2504                     tcg_gen_ext_i32_i64(temp, orig);
2505                 } else {
2506                     tcg_gen_extu_i32_i64(temp, orig);
2507                 }
2508                 op->args[pi++] = tcgv_i64_arg(temp);
2509                 extend_free[n_extend++] = temp;
2510             }
2511             break;
2512 
2513         default:
2514             g_assert_not_reached();
2515         }
2516     }
2517     op->args[pi++] = (uintptr_t)func;
2518     op->args[pi++] = (uintptr_t)info;
2519     tcg_debug_assert(pi == total_args);
2520 
2521     if (tcg_ctx->emit_before_op) {
2522         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2523     } else {
2524         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2525     }
2526 
2527     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2528     for (i = 0; i < n_extend; ++i) {
2529         tcg_temp_free_i64(extend_free[i]);
2530     }
2531 }
2532 
2533 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2534 {
2535     tcg_gen_callN(func, info, ret, NULL);
2536 }
2537 
2538 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2539 {
2540     tcg_gen_callN(func, info, ret, &t1);
2541 }
2542 
2543 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2544                    TCGTemp *t1, TCGTemp *t2)
2545 {
2546     TCGTemp *args[2] = { t1, t2 };
2547     tcg_gen_callN(func, info, ret, args);
2548 }
2549 
2550 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2551                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2552 {
2553     TCGTemp *args[3] = { t1, t2, t3 };
2554     tcg_gen_callN(func, info, ret, args);
2555 }
2556 
2557 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2558                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2559 {
2560     TCGTemp *args[4] = { t1, t2, t3, t4 };
2561     tcg_gen_callN(func, info, ret, args);
2562 }
2563 
2564 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2565                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2566 {
2567     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2568     tcg_gen_callN(func, info, ret, args);
2569 }
2570 
2571 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2572                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2573                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2574 {
2575     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2576     tcg_gen_callN(func, info, ret, args);
2577 }
2578 
2579 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2580                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2581                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2582 {
2583     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2584     tcg_gen_callN(func, info, ret, args);
2585 }
2586 
2587 static void tcg_reg_alloc_start(TCGContext *s)
2588 {
2589     int i, n;
2590 
2591     for (i = 0, n = s->nb_temps; i < n; i++) {
2592         TCGTemp *ts = &s->temps[i];
2593         TCGTempVal val = TEMP_VAL_MEM;
2594 
2595         switch (ts->kind) {
2596         case TEMP_CONST:
2597             val = TEMP_VAL_CONST;
2598             break;
2599         case TEMP_FIXED:
2600             val = TEMP_VAL_REG;
2601             break;
2602         case TEMP_GLOBAL:
2603             break;
2604         case TEMP_EBB:
2605             val = TEMP_VAL_DEAD;
2606             /* fall through */
2607         case TEMP_TB:
2608             ts->mem_allocated = 0;
2609             break;
2610         default:
2611             g_assert_not_reached();
2612         }
2613         ts->val_type = val;
2614     }
2615 
2616     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2617 }
2618 
2619 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2620                                  TCGTemp *ts)
2621 {
2622     int idx = temp_idx(ts);
2623 
2624     switch (ts->kind) {
2625     case TEMP_FIXED:
2626     case TEMP_GLOBAL:
2627         pstrcpy(buf, buf_size, ts->name);
2628         break;
2629     case TEMP_TB:
2630         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2631         break;
2632     case TEMP_EBB:
2633         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2634         break;
2635     case TEMP_CONST:
2636         switch (ts->type) {
2637         case TCG_TYPE_I32:
2638             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2639             break;
2640 #if TCG_TARGET_REG_BITS > 32
2641         case TCG_TYPE_I64:
2642             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2643             break;
2644 #endif
2645         case TCG_TYPE_V64:
2646         case TCG_TYPE_V128:
2647         case TCG_TYPE_V256:
2648             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2649                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2650             break;
2651         default:
2652             g_assert_not_reached();
2653         }
2654         break;
2655     }
2656     return buf;
2657 }
2658 
2659 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2660                              int buf_size, TCGArg arg)
2661 {
2662     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2663 }
2664 
2665 static const char * const cond_name[] =
2666 {
2667     [TCG_COND_NEVER] = "never",
2668     [TCG_COND_ALWAYS] = "always",
2669     [TCG_COND_EQ] = "eq",
2670     [TCG_COND_NE] = "ne",
2671     [TCG_COND_LT] = "lt",
2672     [TCG_COND_GE] = "ge",
2673     [TCG_COND_LE] = "le",
2674     [TCG_COND_GT] = "gt",
2675     [TCG_COND_LTU] = "ltu",
2676     [TCG_COND_GEU] = "geu",
2677     [TCG_COND_LEU] = "leu",
2678     [TCG_COND_GTU] = "gtu",
2679     [TCG_COND_TSTEQ] = "tsteq",
2680     [TCG_COND_TSTNE] = "tstne",
2681 };
2682 
2683 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2684 {
2685     [MO_UB]   = "ub",
2686     [MO_SB]   = "sb",
2687     [MO_LEUW] = "leuw",
2688     [MO_LESW] = "lesw",
2689     [MO_LEUL] = "leul",
2690     [MO_LESL] = "lesl",
2691     [MO_LEUQ] = "leq",
2692     [MO_BEUW] = "beuw",
2693     [MO_BESW] = "besw",
2694     [MO_BEUL] = "beul",
2695     [MO_BESL] = "besl",
2696     [MO_BEUQ] = "beq",
2697     [MO_128 + MO_BE] = "beo",
2698     [MO_128 + MO_LE] = "leo",
2699 };
2700 
2701 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2702     [MO_UNALN >> MO_ASHIFT]    = "un+",
2703     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2704     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2705     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2706     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2707     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2708     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2709     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2710 };
2711 
2712 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2713     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2714     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2715     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2716     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2717     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2718     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2719 };
2720 
2721 static const char bswap_flag_name[][6] = {
2722     [TCG_BSWAP_IZ] = "iz",
2723     [TCG_BSWAP_OZ] = "oz",
2724     [TCG_BSWAP_OS] = "os",
2725     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2726     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2727 };
2728 
2729 #ifdef CONFIG_PLUGIN
2730 static const char * const plugin_from_name[] = {
2731     "from-tb",
2732     "from-insn",
2733     "after-insn",
2734     "after-tb",
2735 };
2736 #endif
2737 
2738 static inline bool tcg_regset_single(TCGRegSet d)
2739 {
2740     return (d & (d - 1)) == 0;
2741 }
2742 
2743 static inline TCGReg tcg_regset_first(TCGRegSet d)
2744 {
2745     if (TCG_TARGET_NB_REGS <= 32) {
2746         return ctz32(d);
2747     } else {
2748         return ctz64(d);
2749     }
2750 }
2751 
2752 /* Return only the number of characters output -- no error return. */
2753 #define ne_fprintf(...) \
2754     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2755 
2756 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2757 {
2758     char buf[128];
2759     TCGOp *op;
2760 
2761     QTAILQ_FOREACH(op, &s->ops, link) {
2762         int i, k, nb_oargs, nb_iargs, nb_cargs;
2763         const TCGOpDef *def;
2764         TCGOpcode c;
2765         int col = 0;
2766 
2767         c = op->opc;
2768         def = &tcg_op_defs[c];
2769 
2770         if (c == INDEX_op_insn_start) {
2771             nb_oargs = 0;
2772             col += ne_fprintf(f, "\n ----");
2773 
2774             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2775                 col += ne_fprintf(f, " %016" PRIx64,
2776                                   tcg_get_insn_start_param(op, i));
2777             }
2778         } else if (c == INDEX_op_call) {
2779             const TCGHelperInfo *info = tcg_call_info(op);
2780             void *func = tcg_call_func(op);
2781 
2782             /* variable number of arguments */
2783             nb_oargs = TCGOP_CALLO(op);
2784             nb_iargs = TCGOP_CALLI(op);
2785             nb_cargs = def->nb_cargs;
2786 
2787             col += ne_fprintf(f, " %s ", def->name);
2788 
2789             /*
2790              * Print the function name from TCGHelperInfo, if available.
2791              * Note that plugins have a template function for the info,
2792              * but the actual function pointer comes from the plugin.
2793              */
2794             if (func == info->func) {
2795                 col += ne_fprintf(f, "%s", info->name);
2796             } else {
2797                 col += ne_fprintf(f, "plugin(%p)", func);
2798             }
2799 
2800             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2801             for (i = 0; i < nb_oargs; i++) {
2802                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2803                                                             op->args[i]));
2804             }
2805             for (i = 0; i < nb_iargs; i++) {
2806                 TCGArg arg = op->args[nb_oargs + i];
2807                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2808                 col += ne_fprintf(f, ",%s", t);
2809             }
2810         } else {
2811             col += ne_fprintf(f, " %s ", def->name);
2812 
2813             nb_oargs = def->nb_oargs;
2814             nb_iargs = def->nb_iargs;
2815             nb_cargs = def->nb_cargs;
2816 
2817             if (def->flags & TCG_OPF_VECTOR) {
2818                 col += ne_fprintf(f, "v%d,e%d,",
2819                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2820                                   8 << TCGOP_VECE(op));
2821             }
2822 
2823             k = 0;
2824             for (i = 0; i < nb_oargs; i++) {
2825                 const char *sep =  k ? "," : "";
2826                 col += ne_fprintf(f, "%s%s", sep,
2827                                   tcg_get_arg_str(s, buf, sizeof(buf),
2828                                                   op->args[k++]));
2829             }
2830             for (i = 0; i < nb_iargs; i++) {
2831                 const char *sep =  k ? "," : "";
2832                 col += ne_fprintf(f, "%s%s", sep,
2833                                   tcg_get_arg_str(s, buf, sizeof(buf),
2834                                                   op->args[k++]));
2835             }
2836             switch (c) {
2837             case INDEX_op_brcond_i32:
2838             case INDEX_op_setcond_i32:
2839             case INDEX_op_negsetcond_i32:
2840             case INDEX_op_movcond_i32:
2841             case INDEX_op_brcond2_i32:
2842             case INDEX_op_setcond2_i32:
2843             case INDEX_op_brcond_i64:
2844             case INDEX_op_setcond_i64:
2845             case INDEX_op_negsetcond_i64:
2846             case INDEX_op_movcond_i64:
2847             case INDEX_op_cmp_vec:
2848             case INDEX_op_cmpsel_vec:
2849                 if (op->args[k] < ARRAY_SIZE(cond_name)
2850                     && cond_name[op->args[k]]) {
2851                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2852                 } else {
2853                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2854                 }
2855                 i = 1;
2856                 break;
2857             case INDEX_op_qemu_ld_i32:
2858             case INDEX_op_qemu_st_i32:
2859             case INDEX_op_qemu_st8_i32:
2860             case INDEX_op_qemu_ld_i64:
2861             case INDEX_op_qemu_st_i64:
2862             case INDEX_op_qemu_ld_i128:
2863             case INDEX_op_qemu_st_i128:
2864                 {
2865                     const char *s_al, *s_op, *s_at;
2866                     MemOpIdx oi = op->args[k++];
2867                     MemOp mop = get_memop(oi);
2868                     unsigned ix = get_mmuidx(oi);
2869 
2870                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2871                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2872                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2873                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2874 
2875                     /* If all fields are accounted for, print symbolically. */
2876                     if (!mop && s_al && s_op && s_at) {
2877                         col += ne_fprintf(f, ",%s%s%s,%u",
2878                                           s_at, s_al, s_op, ix);
2879                     } else {
2880                         mop = get_memop(oi);
2881                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2882                     }
2883                     i = 1;
2884                 }
2885                 break;
2886             case INDEX_op_bswap16_i32:
2887             case INDEX_op_bswap16_i64:
2888             case INDEX_op_bswap32_i32:
2889             case INDEX_op_bswap32_i64:
2890             case INDEX_op_bswap64_i64:
2891                 {
2892                     TCGArg flags = op->args[k];
2893                     const char *name = NULL;
2894 
2895                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2896                         name = bswap_flag_name[flags];
2897                     }
2898                     if (name) {
2899                         col += ne_fprintf(f, ",%s", name);
2900                     } else {
2901                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2902                     }
2903                     i = k = 1;
2904                 }
2905                 break;
2906 #ifdef CONFIG_PLUGIN
2907             case INDEX_op_plugin_cb:
2908                 {
2909                     TCGArg from = op->args[k++];
2910                     const char *name = NULL;
2911 
2912                     if (from < ARRAY_SIZE(plugin_from_name)) {
2913                         name = plugin_from_name[from];
2914                     }
2915                     if (name) {
2916                         col += ne_fprintf(f, "%s", name);
2917                     } else {
2918                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2919                     }
2920                     i = 1;
2921                 }
2922                 break;
2923 #endif
2924             default:
2925                 i = 0;
2926                 break;
2927             }
2928             switch (c) {
2929             case INDEX_op_set_label:
2930             case INDEX_op_br:
2931             case INDEX_op_brcond_i32:
2932             case INDEX_op_brcond_i64:
2933             case INDEX_op_brcond2_i32:
2934                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2935                                   arg_label(op->args[k])->id);
2936                 i++, k++;
2937                 break;
2938             case INDEX_op_mb:
2939                 {
2940                     TCGBar membar = op->args[k];
2941                     const char *b_op, *m_op;
2942 
2943                     switch (membar & TCG_BAR_SC) {
2944                     case 0:
2945                         b_op = "none";
2946                         break;
2947                     case TCG_BAR_LDAQ:
2948                         b_op = "acq";
2949                         break;
2950                     case TCG_BAR_STRL:
2951                         b_op = "rel";
2952                         break;
2953                     case TCG_BAR_SC:
2954                         b_op = "seq";
2955                         break;
2956                     default:
2957                         g_assert_not_reached();
2958                     }
2959 
2960                     switch (membar & TCG_MO_ALL) {
2961                     case 0:
2962                         m_op = "none";
2963                         break;
2964                     case TCG_MO_LD_LD:
2965                         m_op = "rr";
2966                         break;
2967                     case TCG_MO_LD_ST:
2968                         m_op = "rw";
2969                         break;
2970                     case TCG_MO_ST_LD:
2971                         m_op = "wr";
2972                         break;
2973                     case TCG_MO_ST_ST:
2974                         m_op = "ww";
2975                         break;
2976                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2977                         m_op = "rr+rw";
2978                         break;
2979                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2980                         m_op = "rr+wr";
2981                         break;
2982                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2983                         m_op = "rr+ww";
2984                         break;
2985                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2986                         m_op = "rw+wr";
2987                         break;
2988                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2989                         m_op = "rw+ww";
2990                         break;
2991                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2992                         m_op = "wr+ww";
2993                         break;
2994                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2995                         m_op = "rr+rw+wr";
2996                         break;
2997                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2998                         m_op = "rr+rw+ww";
2999                         break;
3000                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3001                         m_op = "rr+wr+ww";
3002                         break;
3003                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3004                         m_op = "rw+wr+ww";
3005                         break;
3006                     case TCG_MO_ALL:
3007                         m_op = "all";
3008                         break;
3009                     default:
3010                         g_assert_not_reached();
3011                     }
3012 
3013                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3014                     i++, k++;
3015                 }
3016                 break;
3017             default:
3018                 break;
3019             }
3020             for (; i < nb_cargs; i++, k++) {
3021                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3022                                   op->args[k]);
3023             }
3024         }
3025 
3026         if (have_prefs || op->life) {
3027             for (; col < 40; ++col) {
3028                 putc(' ', f);
3029             }
3030         }
3031 
3032         if (op->life) {
3033             unsigned life = op->life;
3034 
3035             if (life & (SYNC_ARG * 3)) {
3036                 ne_fprintf(f, "  sync:");
3037                 for (i = 0; i < 2; ++i) {
3038                     if (life & (SYNC_ARG << i)) {
3039                         ne_fprintf(f, " %d", i);
3040                     }
3041                 }
3042             }
3043             life /= DEAD_ARG;
3044             if (life) {
3045                 ne_fprintf(f, "  dead:");
3046                 for (i = 0; life; ++i, life >>= 1) {
3047                     if (life & 1) {
3048                         ne_fprintf(f, " %d", i);
3049                     }
3050                 }
3051             }
3052         }
3053 
3054         if (have_prefs) {
3055             for (i = 0; i < nb_oargs; ++i) {
3056                 TCGRegSet set = output_pref(op, i);
3057 
3058                 if (i == 0) {
3059                     ne_fprintf(f, "  pref=");
3060                 } else {
3061                     ne_fprintf(f, ",");
3062                 }
3063                 if (set == 0) {
3064                     ne_fprintf(f, "none");
3065                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3066                     ne_fprintf(f, "all");
3067 #ifdef CONFIG_DEBUG_TCG
3068                 } else if (tcg_regset_single(set)) {
3069                     TCGReg reg = tcg_regset_first(set);
3070                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3071 #endif
3072                 } else if (TCG_TARGET_NB_REGS <= 32) {
3073                     ne_fprintf(f, "0x%x", (uint32_t)set);
3074                 } else {
3075                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3076                 }
3077             }
3078         }
3079 
3080         putc('\n', f);
3081     }
3082 }
3083 
3084 /* we give more priority to constraints with less registers */
3085 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3086 {
3087     int n;
3088 
3089     arg_ct += k;
3090     n = ctpop64(arg_ct->regs);
3091 
3092     /*
3093      * Sort constraints of a single register first, which includes output
3094      * aliases (which must exactly match the input already allocated).
3095      */
3096     if (n == 1 || arg_ct->oalias) {
3097         return INT_MAX;
3098     }
3099 
3100     /*
3101      * Sort register pairs next, first then second immediately after.
3102      * Arbitrarily sort multiple pairs by the index of the first reg;
3103      * there shouldn't be many pairs.
3104      */
3105     switch (arg_ct->pair) {
3106     case 1:
3107     case 3:
3108         return (k + 1) * 2;
3109     case 2:
3110         return (arg_ct->pair_index + 1) * 2 - 1;
3111     }
3112 
3113     /* Finally, sort by decreasing register count. */
3114     assert(n > 1);
3115     return -n;
3116 }
3117 
3118 /* sort from highest priority to lowest */
3119 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3120 {
3121     int i, j;
3122 
3123     for (i = 0; i < n; i++) {
3124         a[start + i].sort_index = start + i;
3125     }
3126     if (n <= 1) {
3127         return;
3128     }
3129     for (i = 0; i < n - 1; i++) {
3130         for (j = i + 1; j < n; j++) {
3131             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3132             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3133             if (p1 < p2) {
3134                 int tmp = a[start + i].sort_index;
3135                 a[start + i].sort_index = a[start + j].sort_index;
3136                 a[start + j].sort_index = tmp;
3137             }
3138         }
3139     }
3140 }
3141 
3142 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3143 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3144 
3145 static void process_constraint_sets(void)
3146 {
3147     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3148         const TCGConstraintSet *tdefs = &constraint_sets[c];
3149         TCGArgConstraint *args_ct = all_cts[c];
3150         int nb_oargs = tdefs->nb_oargs;
3151         int nb_iargs = tdefs->nb_iargs;
3152         int nb_args = nb_oargs + nb_iargs;
3153         bool saw_alias_pair = false;
3154 
3155         for (int i = 0; i < nb_args; i++) {
3156             const char *ct_str = tdefs->args_ct_str[i];
3157             bool input_p = i >= nb_oargs;
3158             int o;
3159 
3160             switch (*ct_str) {
3161             case '0' ... '9':
3162                 o = *ct_str - '0';
3163                 tcg_debug_assert(input_p);
3164                 tcg_debug_assert(o < nb_oargs);
3165                 tcg_debug_assert(args_ct[o].regs != 0);
3166                 tcg_debug_assert(!args_ct[o].oalias);
3167                 args_ct[i] = args_ct[o];
3168                 /* The output sets oalias.  */
3169                 args_ct[o].oalias = 1;
3170                 args_ct[o].alias_index = i;
3171                 /* The input sets ialias. */
3172                 args_ct[i].ialias = 1;
3173                 args_ct[i].alias_index = o;
3174                 if (args_ct[i].pair) {
3175                     saw_alias_pair = true;
3176                 }
3177                 tcg_debug_assert(ct_str[1] == '\0');
3178                 continue;
3179 
3180             case '&':
3181                 tcg_debug_assert(!input_p);
3182                 args_ct[i].newreg = true;
3183                 ct_str++;
3184                 break;
3185 
3186             case 'p': /* plus */
3187                 /* Allocate to the register after the previous. */
3188                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3189                 o = i - 1;
3190                 tcg_debug_assert(!args_ct[o].pair);
3191                 tcg_debug_assert(!args_ct[o].ct);
3192                 args_ct[i] = (TCGArgConstraint){
3193                     .pair = 2,
3194                     .pair_index = o,
3195                     .regs = args_ct[o].regs << 1,
3196                     .newreg = args_ct[o].newreg,
3197                 };
3198                 args_ct[o].pair = 1;
3199                 args_ct[o].pair_index = i;
3200                 tcg_debug_assert(ct_str[1] == '\0');
3201                 continue;
3202 
3203             case 'm': /* minus */
3204                 /* Allocate to the register before the previous. */
3205                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3206                 o = i - 1;
3207                 tcg_debug_assert(!args_ct[o].pair);
3208                 tcg_debug_assert(!args_ct[o].ct);
3209                 args_ct[i] = (TCGArgConstraint){
3210                     .pair = 1,
3211                     .pair_index = o,
3212                     .regs = args_ct[o].regs >> 1,
3213                     .newreg = args_ct[o].newreg,
3214                 };
3215                 args_ct[o].pair = 2;
3216                 args_ct[o].pair_index = i;
3217                 tcg_debug_assert(ct_str[1] == '\0');
3218                 continue;
3219             }
3220 
3221             do {
3222                 switch (*ct_str) {
3223                 case 'i':
3224                     args_ct[i].ct |= TCG_CT_CONST;
3225                     break;
3226 
3227                 /* Include all of the target-specific constraints. */
3228 
3229 #undef CONST
3230 #define CONST(CASE, MASK) \
3231     case CASE: args_ct[i].ct |= MASK; break;
3232 #define REGS(CASE, MASK) \
3233     case CASE: args_ct[i].regs |= MASK; break;
3234 
3235 #include "tcg-target-con-str.h"
3236 
3237 #undef REGS
3238 #undef CONST
3239                 default:
3240                 case '0' ... '9':
3241                 case '&':
3242                 case 'p':
3243                 case 'm':
3244                     /* Typo in TCGConstraintSet constraint. */
3245                     g_assert_not_reached();
3246                 }
3247             } while (*++ct_str != '\0');
3248         }
3249 
3250         /*
3251          * Fix up output pairs that are aliased with inputs.
3252          * When we created the alias, we copied pair from the output.
3253          * There are three cases:
3254          *    (1a) Pairs of inputs alias pairs of outputs.
3255          *    (1b) One input aliases the first of a pair of outputs.
3256          *    (2)  One input aliases the second of a pair of outputs.
3257          *
3258          * Case 1a is handled by making sure that the pair_index'es are
3259          * properly updated so that they appear the same as a pair of inputs.
3260          *
3261          * Case 1b is handled by setting the pair_index of the input to
3262          * itself, simply so it doesn't point to an unrelated argument.
3263          * Since we don't encounter the "second" during the input allocation
3264          * phase, nothing happens with the second half of the input pair.
3265          *
3266          * Case 2 is handled by setting the second input to pair=3, the
3267          * first output to pair=3, and the pair_index'es to match.
3268          */
3269         if (saw_alias_pair) {
3270             for (int i = nb_oargs; i < nb_args; i++) {
3271                 int o, o2, i2;
3272 
3273                 /*
3274                  * Since [0-9pm] must be alone in the constraint string,
3275                  * the only way they can both be set is if the pair comes
3276                  * from the output alias.
3277                  */
3278                 if (!args_ct[i].ialias) {
3279                     continue;
3280                 }
3281                 switch (args_ct[i].pair) {
3282                 case 0:
3283                     break;
3284                 case 1:
3285                     o = args_ct[i].alias_index;
3286                     o2 = args_ct[o].pair_index;
3287                     tcg_debug_assert(args_ct[o].pair == 1);
3288                     tcg_debug_assert(args_ct[o2].pair == 2);
3289                     if (args_ct[o2].oalias) {
3290                         /* Case 1a */
3291                         i2 = args_ct[o2].alias_index;
3292                         tcg_debug_assert(args_ct[i2].pair == 2);
3293                         args_ct[i2].pair_index = i;
3294                         args_ct[i].pair_index = i2;
3295                     } else {
3296                         /* Case 1b */
3297                         args_ct[i].pair_index = i;
3298                     }
3299                     break;
3300                 case 2:
3301                     o = args_ct[i].alias_index;
3302                     o2 = args_ct[o].pair_index;
3303                     tcg_debug_assert(args_ct[o].pair == 2);
3304                     tcg_debug_assert(args_ct[o2].pair == 1);
3305                     if (args_ct[o2].oalias) {
3306                         /* Case 1a */
3307                         i2 = args_ct[o2].alias_index;
3308                         tcg_debug_assert(args_ct[i2].pair == 1);
3309                         args_ct[i2].pair_index = i;
3310                         args_ct[i].pair_index = i2;
3311                     } else {
3312                         /* Case 2 */
3313                         args_ct[i].pair = 3;
3314                         args_ct[o2].pair = 3;
3315                         args_ct[i].pair_index = o2;
3316                         args_ct[o2].pair_index = i;
3317                     }
3318                     break;
3319                 default:
3320                     g_assert_not_reached();
3321                 }
3322             }
3323         }
3324 
3325         /* sort the constraints (XXX: this is just an heuristic) */
3326         sort_constraints(args_ct, 0, nb_oargs);
3327         sort_constraints(args_ct, nb_oargs, nb_iargs);
3328     }
3329 }
3330 
3331 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3332 {
3333     const TCGOpDef *def = &tcg_op_defs[op->opc];
3334     TCGConstraintSetIndex con_set;
3335 
3336 #ifdef CONFIG_DEBUG_TCG
3337     assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
3338 #endif
3339 
3340     if (def->flags & TCG_OPF_NOT_PRESENT) {
3341         return empty_cts;
3342     }
3343 
3344     con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op));
3345     tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
3346 
3347     /* The constraint arguments must match TCGOpcode arguments. */
3348     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3349     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3350 
3351     return all_cts[con_set];
3352 }
3353 
3354 static void remove_label_use(TCGOp *op, int idx)
3355 {
3356     TCGLabel *label = arg_label(op->args[idx]);
3357     TCGLabelUse *use;
3358 
3359     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3360         if (use->op == op) {
3361             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3362             return;
3363         }
3364     }
3365     g_assert_not_reached();
3366 }
3367 
3368 void tcg_op_remove(TCGContext *s, TCGOp *op)
3369 {
3370     switch (op->opc) {
3371     case INDEX_op_br:
3372         remove_label_use(op, 0);
3373         break;
3374     case INDEX_op_brcond_i32:
3375     case INDEX_op_brcond_i64:
3376         remove_label_use(op, 3);
3377         break;
3378     case INDEX_op_brcond2_i32:
3379         remove_label_use(op, 5);
3380         break;
3381     default:
3382         break;
3383     }
3384 
3385     QTAILQ_REMOVE(&s->ops, op, link);
3386     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3387     s->nb_ops--;
3388 }
3389 
3390 void tcg_remove_ops_after(TCGOp *op)
3391 {
3392     TCGContext *s = tcg_ctx;
3393 
3394     while (true) {
3395         TCGOp *last = tcg_last_op();
3396         if (last == op) {
3397             return;
3398         }
3399         tcg_op_remove(s, last);
3400     }
3401 }
3402 
3403 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3404 {
3405     TCGContext *s = tcg_ctx;
3406     TCGOp *op = NULL;
3407 
3408     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3409         QTAILQ_FOREACH(op, &s->free_ops, link) {
3410             if (nargs <= op->nargs) {
3411                 QTAILQ_REMOVE(&s->free_ops, op, link);
3412                 nargs = op->nargs;
3413                 goto found;
3414             }
3415         }
3416     }
3417 
3418     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3419     nargs = MAX(4, nargs);
3420     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3421 
3422  found:
3423     memset(op, 0, offsetof(TCGOp, link));
3424     op->opc = opc;
3425     op->nargs = nargs;
3426 
3427     /* Check for bitfield overflow. */
3428     tcg_debug_assert(op->nargs == nargs);
3429 
3430     s->nb_ops++;
3431     return op;
3432 }
3433 
3434 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3435 {
3436     TCGOp *op = tcg_op_alloc(opc, nargs);
3437 
3438     if (tcg_ctx->emit_before_op) {
3439         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3440     } else {
3441         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3442     }
3443     return op;
3444 }
3445 
3446 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3447                             TCGOpcode opc, unsigned nargs)
3448 {
3449     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3450 
3451     TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
3452     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3453     return new_op;
3454 }
3455 
3456 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3457                            TCGOpcode opc, unsigned nargs)
3458 {
3459     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3460 
3461     TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
3462     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3463     return new_op;
3464 }
3465 
3466 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3467 {
3468     TCGLabelUse *u;
3469 
3470     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3471         TCGOp *op = u->op;
3472         switch (op->opc) {
3473         case INDEX_op_br:
3474             op->args[0] = label_arg(to);
3475             break;
3476         case INDEX_op_brcond_i32:
3477         case INDEX_op_brcond_i64:
3478             op->args[3] = label_arg(to);
3479             break;
3480         case INDEX_op_brcond2_i32:
3481             op->args[5] = label_arg(to);
3482             break;
3483         default:
3484             g_assert_not_reached();
3485         }
3486     }
3487 
3488     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3489 }
3490 
3491 /* Reachable analysis : remove unreachable code.  */
3492 static void __attribute__((noinline))
3493 reachable_code_pass(TCGContext *s)
3494 {
3495     TCGOp *op, *op_next, *op_prev;
3496     bool dead = false;
3497 
3498     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3499         bool remove = dead;
3500         TCGLabel *label;
3501 
3502         switch (op->opc) {
3503         case INDEX_op_set_label:
3504             label = arg_label(op->args[0]);
3505 
3506             /*
3507              * Note that the first op in the TB is always a load,
3508              * so there is always something before a label.
3509              */
3510             op_prev = QTAILQ_PREV(op, link);
3511 
3512             /*
3513              * If we find two sequential labels, move all branches to
3514              * reference the second label and remove the first label.
3515              * Do this before branch to next optimization, so that the
3516              * middle label is out of the way.
3517              */
3518             if (op_prev->opc == INDEX_op_set_label) {
3519                 move_label_uses(label, arg_label(op_prev->args[0]));
3520                 tcg_op_remove(s, op_prev);
3521                 op_prev = QTAILQ_PREV(op, link);
3522             }
3523 
3524             /*
3525              * Optimization can fold conditional branches to unconditional.
3526              * If we find a label which is preceded by an unconditional
3527              * branch to next, remove the branch.  We couldn't do this when
3528              * processing the branch because any dead code between the branch
3529              * and label had not yet been removed.
3530              */
3531             if (op_prev->opc == INDEX_op_br &&
3532                 label == arg_label(op_prev->args[0])) {
3533                 tcg_op_remove(s, op_prev);
3534                 /* Fall through means insns become live again.  */
3535                 dead = false;
3536             }
3537 
3538             if (QSIMPLEQ_EMPTY(&label->branches)) {
3539                 /*
3540                  * While there is an occasional backward branch, virtually
3541                  * all branches generated by the translators are forward.
3542                  * Which means that generally we will have already removed
3543                  * all references to the label that will be, and there is
3544                  * little to be gained by iterating.
3545                  */
3546                 remove = true;
3547             } else {
3548                 /* Once we see a label, insns become live again.  */
3549                 dead = false;
3550                 remove = false;
3551             }
3552             break;
3553 
3554         case INDEX_op_br:
3555         case INDEX_op_exit_tb:
3556         case INDEX_op_goto_ptr:
3557             /* Unconditional branches; everything following is dead.  */
3558             dead = true;
3559             break;
3560 
3561         case INDEX_op_call:
3562             /* Notice noreturn helper calls, raising exceptions.  */
3563             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3564                 dead = true;
3565             }
3566             break;
3567 
3568         case INDEX_op_insn_start:
3569             /* Never remove -- we need to keep these for unwind.  */
3570             remove = false;
3571             break;
3572 
3573         default:
3574             break;
3575         }
3576 
3577         if (remove) {
3578             tcg_op_remove(s, op);
3579         }
3580     }
3581 }
3582 
3583 #define TS_DEAD  1
3584 #define TS_MEM   2
3585 
3586 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3587 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3588 
3589 /* For liveness_pass_1, the register preferences for a given temp.  */
3590 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3591 {
3592     return ts->state_ptr;
3593 }
3594 
3595 /* For liveness_pass_1, reset the preferences for a given temp to the
3596  * maximal regset for its type.
3597  */
3598 static inline void la_reset_pref(TCGTemp *ts)
3599 {
3600     *la_temp_pref(ts)
3601         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3602 }
3603 
3604 /* liveness analysis: end of function: all temps are dead, and globals
3605    should be in memory. */
3606 static void la_func_end(TCGContext *s, int ng, int nt)
3607 {
3608     int i;
3609 
3610     for (i = 0; i < ng; ++i) {
3611         s->temps[i].state = TS_DEAD | TS_MEM;
3612         la_reset_pref(&s->temps[i]);
3613     }
3614     for (i = ng; i < nt; ++i) {
3615         s->temps[i].state = TS_DEAD;
3616         la_reset_pref(&s->temps[i]);
3617     }
3618 }
3619 
3620 /* liveness analysis: end of basic block: all temps are dead, globals
3621    and local temps should be in memory. */
3622 static void la_bb_end(TCGContext *s, int ng, int nt)
3623 {
3624     int i;
3625 
3626     for (i = 0; i < nt; ++i) {
3627         TCGTemp *ts = &s->temps[i];
3628         int state;
3629 
3630         switch (ts->kind) {
3631         case TEMP_FIXED:
3632         case TEMP_GLOBAL:
3633         case TEMP_TB:
3634             state = TS_DEAD | TS_MEM;
3635             break;
3636         case TEMP_EBB:
3637         case TEMP_CONST:
3638             state = TS_DEAD;
3639             break;
3640         default:
3641             g_assert_not_reached();
3642         }
3643         ts->state = state;
3644         la_reset_pref(ts);
3645     }
3646 }
3647 
3648 /* liveness analysis: sync globals back to memory.  */
3649 static void la_global_sync(TCGContext *s, int ng)
3650 {
3651     int i;
3652 
3653     for (i = 0; i < ng; ++i) {
3654         int state = s->temps[i].state;
3655         s->temps[i].state = state | TS_MEM;
3656         if (state == TS_DEAD) {
3657             /* If the global was previously dead, reset prefs.  */
3658             la_reset_pref(&s->temps[i]);
3659         }
3660     }
3661 }
3662 
3663 /*
3664  * liveness analysis: conditional branch: all temps are dead unless
3665  * explicitly live-across-conditional-branch, globals and local temps
3666  * should be synced.
3667  */
3668 static void la_bb_sync(TCGContext *s, int ng, int nt)
3669 {
3670     la_global_sync(s, ng);
3671 
3672     for (int i = ng; i < nt; ++i) {
3673         TCGTemp *ts = &s->temps[i];
3674         int state;
3675 
3676         switch (ts->kind) {
3677         case TEMP_TB:
3678             state = ts->state;
3679             ts->state = state | TS_MEM;
3680             if (state != TS_DEAD) {
3681                 continue;
3682             }
3683             break;
3684         case TEMP_EBB:
3685         case TEMP_CONST:
3686             continue;
3687         default:
3688             g_assert_not_reached();
3689         }
3690         la_reset_pref(&s->temps[i]);
3691     }
3692 }
3693 
3694 /* liveness analysis: sync globals back to memory and kill.  */
3695 static void la_global_kill(TCGContext *s, int ng)
3696 {
3697     int i;
3698 
3699     for (i = 0; i < ng; i++) {
3700         s->temps[i].state = TS_DEAD | TS_MEM;
3701         la_reset_pref(&s->temps[i]);
3702     }
3703 }
3704 
3705 /* liveness analysis: note live globals crossing calls.  */
3706 static void la_cross_call(TCGContext *s, int nt)
3707 {
3708     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3709     int i;
3710 
3711     for (i = 0; i < nt; i++) {
3712         TCGTemp *ts = &s->temps[i];
3713         if (!(ts->state & TS_DEAD)) {
3714             TCGRegSet *pset = la_temp_pref(ts);
3715             TCGRegSet set = *pset;
3716 
3717             set &= mask;
3718             /* If the combination is not possible, restart.  */
3719             if (set == 0) {
3720                 set = tcg_target_available_regs[ts->type] & mask;
3721             }
3722             *pset = set;
3723         }
3724     }
3725 }
3726 
3727 /*
3728  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3729  * to TEMP_EBB, if possible.
3730  */
3731 static void __attribute__((noinline))
3732 liveness_pass_0(TCGContext *s)
3733 {
3734     void * const multiple_ebb = (void *)(uintptr_t)-1;
3735     int nb_temps = s->nb_temps;
3736     TCGOp *op, *ebb;
3737 
3738     for (int i = s->nb_globals; i < nb_temps; ++i) {
3739         s->temps[i].state_ptr = NULL;
3740     }
3741 
3742     /*
3743      * Represent each EBB by the op at which it begins.  In the case of
3744      * the first EBB, this is the first op, otherwise it is a label.
3745      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3746      * within a single EBB, else MULTIPLE_EBB.
3747      */
3748     ebb = QTAILQ_FIRST(&s->ops);
3749     QTAILQ_FOREACH(op, &s->ops, link) {
3750         const TCGOpDef *def;
3751         int nb_oargs, nb_iargs;
3752 
3753         switch (op->opc) {
3754         case INDEX_op_set_label:
3755             ebb = op;
3756             continue;
3757         case INDEX_op_discard:
3758             continue;
3759         case INDEX_op_call:
3760             nb_oargs = TCGOP_CALLO(op);
3761             nb_iargs = TCGOP_CALLI(op);
3762             break;
3763         default:
3764             def = &tcg_op_defs[op->opc];
3765             nb_oargs = def->nb_oargs;
3766             nb_iargs = def->nb_iargs;
3767             break;
3768         }
3769 
3770         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3771             TCGTemp *ts = arg_temp(op->args[i]);
3772 
3773             if (ts->kind != TEMP_TB) {
3774                 continue;
3775             }
3776             if (ts->state_ptr == NULL) {
3777                 ts->state_ptr = ebb;
3778             } else if (ts->state_ptr != ebb) {
3779                 ts->state_ptr = multiple_ebb;
3780             }
3781         }
3782     }
3783 
3784     /*
3785      * For TEMP_TB that turned out not to be used beyond one EBB,
3786      * reduce the liveness to TEMP_EBB.
3787      */
3788     for (int i = s->nb_globals; i < nb_temps; ++i) {
3789         TCGTemp *ts = &s->temps[i];
3790         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3791             ts->kind = TEMP_EBB;
3792         }
3793     }
3794 }
3795 
3796 /* Liveness analysis : update the opc_arg_life array to tell if a
3797    given input arguments is dead. Instructions updating dead
3798    temporaries are removed. */
3799 static void __attribute__((noinline))
3800 liveness_pass_1(TCGContext *s)
3801 {
3802     int nb_globals = s->nb_globals;
3803     int nb_temps = s->nb_temps;
3804     TCGOp *op, *op_prev;
3805     TCGRegSet *prefs;
3806     int i;
3807 
3808     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3809     for (i = 0; i < nb_temps; ++i) {
3810         s->temps[i].state_ptr = prefs + i;
3811     }
3812 
3813     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3814     la_func_end(s, nb_globals, nb_temps);
3815 
3816     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3817         int nb_iargs, nb_oargs;
3818         TCGOpcode opc_new, opc_new2;
3819         bool have_opc_new2;
3820         TCGLifeData arg_life = 0;
3821         TCGTemp *ts;
3822         TCGOpcode opc = op->opc;
3823         const TCGOpDef *def = &tcg_op_defs[opc];
3824         const TCGArgConstraint *args_ct;
3825 
3826         switch (opc) {
3827         case INDEX_op_call:
3828             {
3829                 const TCGHelperInfo *info = tcg_call_info(op);
3830                 int call_flags = tcg_call_flags(op);
3831 
3832                 nb_oargs = TCGOP_CALLO(op);
3833                 nb_iargs = TCGOP_CALLI(op);
3834 
3835                 /* pure functions can be removed if their result is unused */
3836                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3837                     for (i = 0; i < nb_oargs; i++) {
3838                         ts = arg_temp(op->args[i]);
3839                         if (ts->state != TS_DEAD) {
3840                             goto do_not_remove_call;
3841                         }
3842                     }
3843                     goto do_remove;
3844                 }
3845             do_not_remove_call:
3846 
3847                 /* Output args are dead.  */
3848                 for (i = 0; i < nb_oargs; i++) {
3849                     ts = arg_temp(op->args[i]);
3850                     if (ts->state & TS_DEAD) {
3851                         arg_life |= DEAD_ARG << i;
3852                     }
3853                     if (ts->state & TS_MEM) {
3854                         arg_life |= SYNC_ARG << i;
3855                     }
3856                     ts->state = TS_DEAD;
3857                     la_reset_pref(ts);
3858                 }
3859 
3860                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3861                 memset(op->output_pref, 0, sizeof(op->output_pref));
3862 
3863                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3864                                     TCG_CALL_NO_READ_GLOBALS))) {
3865                     la_global_kill(s, nb_globals);
3866                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3867                     la_global_sync(s, nb_globals);
3868                 }
3869 
3870                 /* Record arguments that die in this helper.  */
3871                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3872                     ts = arg_temp(op->args[i]);
3873                     if (ts->state & TS_DEAD) {
3874                         arg_life |= DEAD_ARG << i;
3875                     }
3876                 }
3877 
3878                 /* For all live registers, remove call-clobbered prefs.  */
3879                 la_cross_call(s, nb_temps);
3880 
3881                 /*
3882                  * Input arguments are live for preceding opcodes.
3883                  *
3884                  * For those arguments that die, and will be allocated in
3885                  * registers, clear the register set for that arg, to be
3886                  * filled in below.  For args that will be on the stack,
3887                  * reset to any available reg.  Process arguments in reverse
3888                  * order so that if a temp is used more than once, the stack
3889                  * reset to max happens before the register reset to 0.
3890                  */
3891                 for (i = nb_iargs - 1; i >= 0; i--) {
3892                     const TCGCallArgumentLoc *loc = &info->in[i];
3893                     ts = arg_temp(op->args[nb_oargs + i]);
3894 
3895                     if (ts->state & TS_DEAD) {
3896                         switch (loc->kind) {
3897                         case TCG_CALL_ARG_NORMAL:
3898                         case TCG_CALL_ARG_EXTEND_U:
3899                         case TCG_CALL_ARG_EXTEND_S:
3900                             if (arg_slot_reg_p(loc->arg_slot)) {
3901                                 *la_temp_pref(ts) = 0;
3902                                 break;
3903                             }
3904                             /* fall through */
3905                         default:
3906                             *la_temp_pref(ts) =
3907                                 tcg_target_available_regs[ts->type];
3908                             break;
3909                         }
3910                         ts->state &= ~TS_DEAD;
3911                     }
3912                 }
3913 
3914                 /*
3915                  * For each input argument, add its input register to prefs.
3916                  * If a temp is used once, this produces a single set bit;
3917                  * if a temp is used multiple times, this produces a set.
3918                  */
3919                 for (i = 0; i < nb_iargs; i++) {
3920                     const TCGCallArgumentLoc *loc = &info->in[i];
3921                     ts = arg_temp(op->args[nb_oargs + i]);
3922 
3923                     switch (loc->kind) {
3924                     case TCG_CALL_ARG_NORMAL:
3925                     case TCG_CALL_ARG_EXTEND_U:
3926                     case TCG_CALL_ARG_EXTEND_S:
3927                         if (arg_slot_reg_p(loc->arg_slot)) {
3928                             tcg_regset_set_reg(*la_temp_pref(ts),
3929                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3930                         }
3931                         break;
3932                     default:
3933                         break;
3934                     }
3935                 }
3936             }
3937             break;
3938         case INDEX_op_insn_start:
3939             break;
3940         case INDEX_op_discard:
3941             /* mark the temporary as dead */
3942             ts = arg_temp(op->args[0]);
3943             ts->state = TS_DEAD;
3944             la_reset_pref(ts);
3945             break;
3946 
3947         case INDEX_op_add2_i32:
3948             opc_new = INDEX_op_add_i32;
3949             goto do_addsub2;
3950         case INDEX_op_sub2_i32:
3951             opc_new = INDEX_op_sub_i32;
3952             goto do_addsub2;
3953         case INDEX_op_add2_i64:
3954             opc_new = INDEX_op_add_i64;
3955             goto do_addsub2;
3956         case INDEX_op_sub2_i64:
3957             opc_new = INDEX_op_sub_i64;
3958         do_addsub2:
3959             nb_iargs = 4;
3960             nb_oargs = 2;
3961             /* Test if the high part of the operation is dead, but not
3962                the low part.  The result can be optimized to a simple
3963                add or sub.  This happens often for x86_64 guest when the
3964                cpu mode is set to 32 bit.  */
3965             if (arg_temp(op->args[1])->state == TS_DEAD) {
3966                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3967                     goto do_remove;
3968                 }
3969                 /* Replace the opcode and adjust the args in place,
3970                    leaving 3 unused args at the end.  */
3971                 op->opc = opc = opc_new;
3972                 op->args[1] = op->args[2];
3973                 op->args[2] = op->args[4];
3974                 /* Fall through and mark the single-word operation live.  */
3975                 nb_iargs = 2;
3976                 nb_oargs = 1;
3977             }
3978             goto do_not_remove;
3979 
3980         case INDEX_op_mulu2_i32:
3981             opc_new = INDEX_op_mul_i32;
3982             opc_new2 = INDEX_op_muluh_i32;
3983             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3984             goto do_mul2;
3985         case INDEX_op_muls2_i32:
3986             opc_new = INDEX_op_mul_i32;
3987             opc_new2 = INDEX_op_mulsh_i32;
3988             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3989             goto do_mul2;
3990         case INDEX_op_mulu2_i64:
3991             opc_new = INDEX_op_mul_i64;
3992             opc_new2 = INDEX_op_muluh_i64;
3993             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3994             goto do_mul2;
3995         case INDEX_op_muls2_i64:
3996             opc_new = INDEX_op_mul_i64;
3997             opc_new2 = INDEX_op_mulsh_i64;
3998             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3999             goto do_mul2;
4000         do_mul2:
4001             nb_iargs = 2;
4002             nb_oargs = 2;
4003             if (arg_temp(op->args[1])->state == TS_DEAD) {
4004                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4005                     /* Both parts of the operation are dead.  */
4006                     goto do_remove;
4007                 }
4008                 /* The high part of the operation is dead; generate the low. */
4009                 op->opc = opc = opc_new;
4010                 op->args[1] = op->args[2];
4011                 op->args[2] = op->args[3];
4012             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4013                 /* The low part of the operation is dead; generate the high. */
4014                 op->opc = opc = opc_new2;
4015                 op->args[0] = op->args[1];
4016                 op->args[1] = op->args[2];
4017                 op->args[2] = op->args[3];
4018             } else {
4019                 goto do_not_remove;
4020             }
4021             /* Mark the single-word operation live.  */
4022             nb_oargs = 1;
4023             goto do_not_remove;
4024 
4025         default:
4026             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4027             nb_iargs = def->nb_iargs;
4028             nb_oargs = def->nb_oargs;
4029 
4030             /* Test if the operation can be removed because all
4031                its outputs are dead. We assume that nb_oargs == 0
4032                implies side effects */
4033             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4034                 for (i = 0; i < nb_oargs; i++) {
4035                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4036                         goto do_not_remove;
4037                     }
4038                 }
4039                 goto do_remove;
4040             }
4041             goto do_not_remove;
4042 
4043         do_remove:
4044             tcg_op_remove(s, op);
4045             break;
4046 
4047         do_not_remove:
4048             for (i = 0; i < nb_oargs; i++) {
4049                 ts = arg_temp(op->args[i]);
4050 
4051                 /* Remember the preference of the uses that followed.  */
4052                 if (i < ARRAY_SIZE(op->output_pref)) {
4053                     op->output_pref[i] = *la_temp_pref(ts);
4054                 }
4055 
4056                 /* Output args are dead.  */
4057                 if (ts->state & TS_DEAD) {
4058                     arg_life |= DEAD_ARG << i;
4059                 }
4060                 if (ts->state & TS_MEM) {
4061                     arg_life |= SYNC_ARG << i;
4062                 }
4063                 ts->state = TS_DEAD;
4064                 la_reset_pref(ts);
4065             }
4066 
4067             /* If end of basic block, update.  */
4068             if (def->flags & TCG_OPF_BB_EXIT) {
4069                 la_func_end(s, nb_globals, nb_temps);
4070             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4071                 la_bb_sync(s, nb_globals, nb_temps);
4072             } else if (def->flags & TCG_OPF_BB_END) {
4073                 la_bb_end(s, nb_globals, nb_temps);
4074             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4075                 la_global_sync(s, nb_globals);
4076                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4077                     la_cross_call(s, nb_temps);
4078                 }
4079             }
4080 
4081             /* Record arguments that die in this opcode.  */
4082             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4083                 ts = arg_temp(op->args[i]);
4084                 if (ts->state & TS_DEAD) {
4085                     arg_life |= DEAD_ARG << i;
4086                 }
4087             }
4088 
4089             /* Input arguments are live for preceding opcodes.  */
4090             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4091                 ts = arg_temp(op->args[i]);
4092                 if (ts->state & TS_DEAD) {
4093                     /* For operands that were dead, initially allow
4094                        all regs for the type.  */
4095                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4096                     ts->state &= ~TS_DEAD;
4097                 }
4098             }
4099 
4100             /* Incorporate constraints for this operand.  */
4101             switch (opc) {
4102             case INDEX_op_mov_i32:
4103             case INDEX_op_mov_i64:
4104                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4105                    have proper constraints.  That said, special case
4106                    moves to propagate preferences backward.  */
4107                 if (IS_DEAD_ARG(1)) {
4108                     *la_temp_pref(arg_temp(op->args[0]))
4109                         = *la_temp_pref(arg_temp(op->args[1]));
4110                 }
4111                 break;
4112 
4113             default:
4114                 args_ct = opcode_args_ct(op);
4115                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4116                     const TCGArgConstraint *ct = &args_ct[i];
4117                     TCGRegSet set, *pset;
4118 
4119                     ts = arg_temp(op->args[i]);
4120                     pset = la_temp_pref(ts);
4121                     set = *pset;
4122 
4123                     set &= ct->regs;
4124                     if (ct->ialias) {
4125                         set &= output_pref(op, ct->alias_index);
4126                     }
4127                     /* If the combination is not possible, restart.  */
4128                     if (set == 0) {
4129                         set = ct->regs;
4130                     }
4131                     *pset = set;
4132                 }
4133                 break;
4134             }
4135             break;
4136         }
4137         op->life = arg_life;
4138     }
4139 }
4140 
4141 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4142 static bool __attribute__((noinline))
4143 liveness_pass_2(TCGContext *s)
4144 {
4145     int nb_globals = s->nb_globals;
4146     int nb_temps, i;
4147     bool changes = false;
4148     TCGOp *op, *op_next;
4149 
4150     /* Create a temporary for each indirect global.  */
4151     for (i = 0; i < nb_globals; ++i) {
4152         TCGTemp *its = &s->temps[i];
4153         if (its->indirect_reg) {
4154             TCGTemp *dts = tcg_temp_alloc(s);
4155             dts->type = its->type;
4156             dts->base_type = its->base_type;
4157             dts->temp_subindex = its->temp_subindex;
4158             dts->kind = TEMP_EBB;
4159             its->state_ptr = dts;
4160         } else {
4161             its->state_ptr = NULL;
4162         }
4163         /* All globals begin dead.  */
4164         its->state = TS_DEAD;
4165     }
4166     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4167         TCGTemp *its = &s->temps[i];
4168         its->state_ptr = NULL;
4169         its->state = TS_DEAD;
4170     }
4171 
4172     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4173         TCGOpcode opc = op->opc;
4174         const TCGOpDef *def = &tcg_op_defs[opc];
4175         TCGLifeData arg_life = op->life;
4176         int nb_iargs, nb_oargs, call_flags;
4177         TCGTemp *arg_ts, *dir_ts;
4178 
4179         if (opc == INDEX_op_call) {
4180             nb_oargs = TCGOP_CALLO(op);
4181             nb_iargs = TCGOP_CALLI(op);
4182             call_flags = tcg_call_flags(op);
4183         } else {
4184             nb_iargs = def->nb_iargs;
4185             nb_oargs = def->nb_oargs;
4186 
4187             /* Set flags similar to how calls require.  */
4188             if (def->flags & TCG_OPF_COND_BRANCH) {
4189                 /* Like reading globals: sync_globals */
4190                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4191             } else if (def->flags & TCG_OPF_BB_END) {
4192                 /* Like writing globals: save_globals */
4193                 call_flags = 0;
4194             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4195                 /* Like reading globals: sync_globals */
4196                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4197             } else {
4198                 /* No effect on globals.  */
4199                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4200                               TCG_CALL_NO_WRITE_GLOBALS);
4201             }
4202         }
4203 
4204         /* Make sure that input arguments are available.  */
4205         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4206             arg_ts = arg_temp(op->args[i]);
4207             dir_ts = arg_ts->state_ptr;
4208             if (dir_ts && arg_ts->state == TS_DEAD) {
4209                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4210                                   ? INDEX_op_ld_i32
4211                                   : INDEX_op_ld_i64);
4212                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
4213 
4214                 lop->args[0] = temp_arg(dir_ts);
4215                 lop->args[1] = temp_arg(arg_ts->mem_base);
4216                 lop->args[2] = arg_ts->mem_offset;
4217 
4218                 /* Loaded, but synced with memory.  */
4219                 arg_ts->state = TS_MEM;
4220             }
4221         }
4222 
4223         /* Perform input replacement, and mark inputs that became dead.
4224            No action is required except keeping temp_state up to date
4225            so that we reload when needed.  */
4226         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4227             arg_ts = arg_temp(op->args[i]);
4228             dir_ts = arg_ts->state_ptr;
4229             if (dir_ts) {
4230                 op->args[i] = temp_arg(dir_ts);
4231                 changes = true;
4232                 if (IS_DEAD_ARG(i)) {
4233                     arg_ts->state = TS_DEAD;
4234                 }
4235             }
4236         }
4237 
4238         /* Liveness analysis should ensure that the following are
4239            all correct, for call sites and basic block end points.  */
4240         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4241             /* Nothing to do */
4242         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4243             for (i = 0; i < nb_globals; ++i) {
4244                 /* Liveness should see that globals are synced back,
4245                    that is, either TS_DEAD or TS_MEM.  */
4246                 arg_ts = &s->temps[i];
4247                 tcg_debug_assert(arg_ts->state_ptr == 0
4248                                  || arg_ts->state != 0);
4249             }
4250         } else {
4251             for (i = 0; i < nb_globals; ++i) {
4252                 /* Liveness should see that globals are saved back,
4253                    that is, TS_DEAD, waiting to be reloaded.  */
4254                 arg_ts = &s->temps[i];
4255                 tcg_debug_assert(arg_ts->state_ptr == 0
4256                                  || arg_ts->state == TS_DEAD);
4257             }
4258         }
4259 
4260         /* Outputs become available.  */
4261         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4262             arg_ts = arg_temp(op->args[0]);
4263             dir_ts = arg_ts->state_ptr;
4264             if (dir_ts) {
4265                 op->args[0] = temp_arg(dir_ts);
4266                 changes = true;
4267 
4268                 /* The output is now live and modified.  */
4269                 arg_ts->state = 0;
4270 
4271                 if (NEED_SYNC_ARG(0)) {
4272                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4273                                       ? INDEX_op_st_i32
4274                                       : INDEX_op_st_i64);
4275                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4276                     TCGTemp *out_ts = dir_ts;
4277 
4278                     if (IS_DEAD_ARG(0)) {
4279                         out_ts = arg_temp(op->args[1]);
4280                         arg_ts->state = TS_DEAD;
4281                         tcg_op_remove(s, op);
4282                     } else {
4283                         arg_ts->state = TS_MEM;
4284                     }
4285 
4286                     sop->args[0] = temp_arg(out_ts);
4287                     sop->args[1] = temp_arg(arg_ts->mem_base);
4288                     sop->args[2] = arg_ts->mem_offset;
4289                 } else {
4290                     tcg_debug_assert(!IS_DEAD_ARG(0));
4291                 }
4292             }
4293         } else {
4294             for (i = 0; i < nb_oargs; i++) {
4295                 arg_ts = arg_temp(op->args[i]);
4296                 dir_ts = arg_ts->state_ptr;
4297                 if (!dir_ts) {
4298                     continue;
4299                 }
4300                 op->args[i] = temp_arg(dir_ts);
4301                 changes = true;
4302 
4303                 /* The output is now live and modified.  */
4304                 arg_ts->state = 0;
4305 
4306                 /* Sync outputs upon their last write.  */
4307                 if (NEED_SYNC_ARG(i)) {
4308                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4309                                       ? INDEX_op_st_i32
4310                                       : INDEX_op_st_i64);
4311                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4312 
4313                     sop->args[0] = temp_arg(dir_ts);
4314                     sop->args[1] = temp_arg(arg_ts->mem_base);
4315                     sop->args[2] = arg_ts->mem_offset;
4316 
4317                     arg_ts->state = TS_MEM;
4318                 }
4319                 /* Drop outputs that are dead.  */
4320                 if (IS_DEAD_ARG(i)) {
4321                     arg_ts->state = TS_DEAD;
4322                 }
4323             }
4324         }
4325     }
4326 
4327     return changes;
4328 }
4329 
4330 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4331 {
4332     intptr_t off;
4333     int size, align;
4334 
4335     /* When allocating an object, look at the full type. */
4336     size = tcg_type_size(ts->base_type);
4337     switch (ts->base_type) {
4338     case TCG_TYPE_I32:
4339         align = 4;
4340         break;
4341     case TCG_TYPE_I64:
4342     case TCG_TYPE_V64:
4343         align = 8;
4344         break;
4345     case TCG_TYPE_I128:
4346     case TCG_TYPE_V128:
4347     case TCG_TYPE_V256:
4348         /*
4349          * Note that we do not require aligned storage for V256,
4350          * and that we provide alignment for I128 to match V128,
4351          * even if that's above what the host ABI requires.
4352          */
4353         align = 16;
4354         break;
4355     default:
4356         g_assert_not_reached();
4357     }
4358 
4359     /*
4360      * Assume the stack is sufficiently aligned.
4361      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4362      * and do not require 16 byte vector alignment.  This seems slightly
4363      * easier than fully parameterizing the above switch statement.
4364      */
4365     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4366     off = ROUND_UP(s->current_frame_offset, align);
4367 
4368     /* If we've exhausted the stack frame, restart with a smaller TB. */
4369     if (off + size > s->frame_end) {
4370         tcg_raise_tb_overflow(s);
4371     }
4372     s->current_frame_offset = off + size;
4373 #if defined(__sparc__)
4374     off += TCG_TARGET_STACK_BIAS;
4375 #endif
4376 
4377     /* If the object was subdivided, assign memory to all the parts. */
4378     if (ts->base_type != ts->type) {
4379         int part_size = tcg_type_size(ts->type);
4380         int part_count = size / part_size;
4381 
4382         /*
4383          * Each part is allocated sequentially in tcg_temp_new_internal.
4384          * Jump back to the first part by subtracting the current index.
4385          */
4386         ts -= ts->temp_subindex;
4387         for (int i = 0; i < part_count; ++i) {
4388             ts[i].mem_offset = off + i * part_size;
4389             ts[i].mem_base = s->frame_temp;
4390             ts[i].mem_allocated = 1;
4391         }
4392     } else {
4393         ts->mem_offset = off;
4394         ts->mem_base = s->frame_temp;
4395         ts->mem_allocated = 1;
4396     }
4397 }
4398 
4399 /* Assign @reg to @ts, and update reg_to_temp[]. */
4400 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4401 {
4402     if (ts->val_type == TEMP_VAL_REG) {
4403         TCGReg old = ts->reg;
4404         tcg_debug_assert(s->reg_to_temp[old] == ts);
4405         if (old == reg) {
4406             return;
4407         }
4408         s->reg_to_temp[old] = NULL;
4409     }
4410     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4411     s->reg_to_temp[reg] = ts;
4412     ts->val_type = TEMP_VAL_REG;
4413     ts->reg = reg;
4414 }
4415 
4416 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4417 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4418 {
4419     tcg_debug_assert(type != TEMP_VAL_REG);
4420     if (ts->val_type == TEMP_VAL_REG) {
4421         TCGReg reg = ts->reg;
4422         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4423         s->reg_to_temp[reg] = NULL;
4424     }
4425     ts->val_type = type;
4426 }
4427 
4428 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4429 
4430 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4431    mark it free; otherwise mark it dead.  */
4432 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4433 {
4434     TCGTempVal new_type;
4435 
4436     switch (ts->kind) {
4437     case TEMP_FIXED:
4438         return;
4439     case TEMP_GLOBAL:
4440     case TEMP_TB:
4441         new_type = TEMP_VAL_MEM;
4442         break;
4443     case TEMP_EBB:
4444         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4445         break;
4446     case TEMP_CONST:
4447         new_type = TEMP_VAL_CONST;
4448         break;
4449     default:
4450         g_assert_not_reached();
4451     }
4452     set_temp_val_nonreg(s, ts, new_type);
4453 }
4454 
4455 /* Mark a temporary as dead.  */
4456 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4457 {
4458     temp_free_or_dead(s, ts, 1);
4459 }
4460 
4461 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4462    registers needs to be allocated to store a constant.  If 'free_or_dead'
4463    is non-zero, subsequently release the temporary; if it is positive, the
4464    temp is dead; if it is negative, the temp is free.  */
4465 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4466                       TCGRegSet preferred_regs, int free_or_dead)
4467 {
4468     if (!temp_readonly(ts) && !ts->mem_coherent) {
4469         if (!ts->mem_allocated) {
4470             temp_allocate_frame(s, ts);
4471         }
4472         switch (ts->val_type) {
4473         case TEMP_VAL_CONST:
4474             /* If we're going to free the temp immediately, then we won't
4475                require it later in a register, so attempt to store the
4476                constant to memory directly.  */
4477             if (free_or_dead
4478                 && tcg_out_sti(s, ts->type, ts->val,
4479                                ts->mem_base->reg, ts->mem_offset)) {
4480                 break;
4481             }
4482             temp_load(s, ts, tcg_target_available_regs[ts->type],
4483                       allocated_regs, preferred_regs);
4484             /* fallthrough */
4485 
4486         case TEMP_VAL_REG:
4487             tcg_out_st(s, ts->type, ts->reg,
4488                        ts->mem_base->reg, ts->mem_offset);
4489             break;
4490 
4491         case TEMP_VAL_MEM:
4492             break;
4493 
4494         case TEMP_VAL_DEAD:
4495         default:
4496             g_assert_not_reached();
4497         }
4498         ts->mem_coherent = 1;
4499     }
4500     if (free_or_dead) {
4501         temp_free_or_dead(s, ts, free_or_dead);
4502     }
4503 }
4504 
4505 /* free register 'reg' by spilling the corresponding temporary if necessary */
4506 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4507 {
4508     TCGTemp *ts = s->reg_to_temp[reg];
4509     if (ts != NULL) {
4510         temp_sync(s, ts, allocated_regs, 0, -1);
4511     }
4512 }
4513 
4514 /**
4515  * tcg_reg_alloc:
4516  * @required_regs: Set of registers in which we must allocate.
4517  * @allocated_regs: Set of registers which must be avoided.
4518  * @preferred_regs: Set of registers we should prefer.
4519  * @rev: True if we search the registers in "indirect" order.
4520  *
4521  * The allocated register must be in @required_regs & ~@allocated_regs,
4522  * but if we can put it in @preferred_regs we may save a move later.
4523  */
4524 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4525                             TCGRegSet allocated_regs,
4526                             TCGRegSet preferred_regs, bool rev)
4527 {
4528     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4529     TCGRegSet reg_ct[2];
4530     const int *order;
4531 
4532     reg_ct[1] = required_regs & ~allocated_regs;
4533     tcg_debug_assert(reg_ct[1] != 0);
4534     reg_ct[0] = reg_ct[1] & preferred_regs;
4535 
4536     /* Skip the preferred_regs option if it cannot be satisfied,
4537        or if the preference made no difference.  */
4538     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4539 
4540     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4541 
4542     /* Try free registers, preferences first.  */
4543     for (j = f; j < 2; j++) {
4544         TCGRegSet set = reg_ct[j];
4545 
4546         if (tcg_regset_single(set)) {
4547             /* One register in the set.  */
4548             TCGReg reg = tcg_regset_first(set);
4549             if (s->reg_to_temp[reg] == NULL) {
4550                 return reg;
4551             }
4552         } else {
4553             for (i = 0; i < n; i++) {
4554                 TCGReg reg = order[i];
4555                 if (s->reg_to_temp[reg] == NULL &&
4556                     tcg_regset_test_reg(set, reg)) {
4557                     return reg;
4558                 }
4559             }
4560         }
4561     }
4562 
4563     /* We must spill something.  */
4564     for (j = f; j < 2; j++) {
4565         TCGRegSet set = reg_ct[j];
4566 
4567         if (tcg_regset_single(set)) {
4568             /* One register in the set.  */
4569             TCGReg reg = tcg_regset_first(set);
4570             tcg_reg_free(s, reg, allocated_regs);
4571             return reg;
4572         } else {
4573             for (i = 0; i < n; i++) {
4574                 TCGReg reg = order[i];
4575                 if (tcg_regset_test_reg(set, reg)) {
4576                     tcg_reg_free(s, reg, allocated_regs);
4577                     return reg;
4578                 }
4579             }
4580         }
4581     }
4582 
4583     g_assert_not_reached();
4584 }
4585 
4586 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4587                                  TCGRegSet allocated_regs,
4588                                  TCGRegSet preferred_regs, bool rev)
4589 {
4590     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4591     TCGRegSet reg_ct[2];
4592     const int *order;
4593 
4594     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4595     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4596     tcg_debug_assert(reg_ct[1] != 0);
4597     reg_ct[0] = reg_ct[1] & preferred_regs;
4598 
4599     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4600 
4601     /*
4602      * Skip the preferred_regs option if it cannot be satisfied,
4603      * or if the preference made no difference.
4604      */
4605     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4606 
4607     /*
4608      * Minimize the number of flushes by looking for 2 free registers first,
4609      * then a single flush, then two flushes.
4610      */
4611     for (fmin = 2; fmin >= 0; fmin--) {
4612         for (j = k; j < 2; j++) {
4613             TCGRegSet set = reg_ct[j];
4614 
4615             for (i = 0; i < n; i++) {
4616                 TCGReg reg = order[i];
4617 
4618                 if (tcg_regset_test_reg(set, reg)) {
4619                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4620                     if (f >= fmin) {
4621                         tcg_reg_free(s, reg, allocated_regs);
4622                         tcg_reg_free(s, reg + 1, allocated_regs);
4623                         return reg;
4624                     }
4625                 }
4626             }
4627         }
4628     }
4629     g_assert_not_reached();
4630 }
4631 
4632 /* Make sure the temporary is in a register.  If needed, allocate the register
4633    from DESIRED while avoiding ALLOCATED.  */
4634 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4635                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4636 {
4637     TCGReg reg;
4638 
4639     switch (ts->val_type) {
4640     case TEMP_VAL_REG:
4641         return;
4642     case TEMP_VAL_CONST:
4643         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4644                             preferred_regs, ts->indirect_base);
4645         if (ts->type <= TCG_TYPE_I64) {
4646             tcg_out_movi(s, ts->type, reg, ts->val);
4647         } else {
4648             uint64_t val = ts->val;
4649             MemOp vece = MO_64;
4650 
4651             /*
4652              * Find the minimal vector element that matches the constant.
4653              * The targets will, in general, have to do this search anyway,
4654              * do this generically.
4655              */
4656             if (val == dup_const(MO_8, val)) {
4657                 vece = MO_8;
4658             } else if (val == dup_const(MO_16, val)) {
4659                 vece = MO_16;
4660             } else if (val == dup_const(MO_32, val)) {
4661                 vece = MO_32;
4662             }
4663 
4664             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4665         }
4666         ts->mem_coherent = 0;
4667         break;
4668     case TEMP_VAL_MEM:
4669         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4670                             preferred_regs, ts->indirect_base);
4671         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4672         ts->mem_coherent = 1;
4673         break;
4674     case TEMP_VAL_DEAD:
4675     default:
4676         g_assert_not_reached();
4677     }
4678     set_temp_val_reg(s, ts, reg);
4679 }
4680 
4681 /* Save a temporary to memory. 'allocated_regs' is used in case a
4682    temporary registers needs to be allocated to store a constant.  */
4683 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4684 {
4685     /* The liveness analysis already ensures that globals are back
4686        in memory. Keep an tcg_debug_assert for safety. */
4687     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4688 }
4689 
4690 /* save globals to their canonical location and assume they can be
4691    modified be the following code. 'allocated_regs' is used in case a
4692    temporary registers needs to be allocated to store a constant. */
4693 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4694 {
4695     int i, n;
4696 
4697     for (i = 0, n = s->nb_globals; i < n; i++) {
4698         temp_save(s, &s->temps[i], allocated_regs);
4699     }
4700 }
4701 
4702 /* sync globals to their canonical location and assume they can be
4703    read by the following code. 'allocated_regs' is used in case a
4704    temporary registers needs to be allocated to store a constant. */
4705 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4706 {
4707     int i, n;
4708 
4709     for (i = 0, n = s->nb_globals; i < n; i++) {
4710         TCGTemp *ts = &s->temps[i];
4711         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4712                          || ts->kind == TEMP_FIXED
4713                          || ts->mem_coherent);
4714     }
4715 }
4716 
4717 /* at the end of a basic block, we assume all temporaries are dead and
4718    all globals are stored at their canonical location. */
4719 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4720 {
4721     int i;
4722 
4723     for (i = s->nb_globals; i < s->nb_temps; i++) {
4724         TCGTemp *ts = &s->temps[i];
4725 
4726         switch (ts->kind) {
4727         case TEMP_TB:
4728             temp_save(s, ts, allocated_regs);
4729             break;
4730         case TEMP_EBB:
4731             /* The liveness analysis already ensures that temps are dead.
4732                Keep an tcg_debug_assert for safety. */
4733             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4734             break;
4735         case TEMP_CONST:
4736             /* Similarly, we should have freed any allocated register. */
4737             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4738             break;
4739         default:
4740             g_assert_not_reached();
4741         }
4742     }
4743 
4744     save_globals(s, allocated_regs);
4745 }
4746 
4747 /*
4748  * At a conditional branch, we assume all temporaries are dead unless
4749  * explicitly live-across-conditional-branch; all globals and local
4750  * temps are synced to their location.
4751  */
4752 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4753 {
4754     sync_globals(s, allocated_regs);
4755 
4756     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4757         TCGTemp *ts = &s->temps[i];
4758         /*
4759          * The liveness analysis already ensures that temps are dead.
4760          * Keep tcg_debug_asserts for safety.
4761          */
4762         switch (ts->kind) {
4763         case TEMP_TB:
4764             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4765             break;
4766         case TEMP_EBB:
4767         case TEMP_CONST:
4768             break;
4769         default:
4770             g_assert_not_reached();
4771         }
4772     }
4773 }
4774 
4775 /*
4776  * Specialized code generation for INDEX_op_mov_* with a constant.
4777  */
4778 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4779                                   tcg_target_ulong val, TCGLifeData arg_life,
4780                                   TCGRegSet preferred_regs)
4781 {
4782     /* ENV should not be modified.  */
4783     tcg_debug_assert(!temp_readonly(ots));
4784 
4785     /* The movi is not explicitly generated here.  */
4786     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4787     ots->val = val;
4788     ots->mem_coherent = 0;
4789     if (NEED_SYNC_ARG(0)) {
4790         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4791     } else if (IS_DEAD_ARG(0)) {
4792         temp_dead(s, ots);
4793     }
4794 }
4795 
4796 /*
4797  * Specialized code generation for INDEX_op_mov_*.
4798  */
4799 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4800 {
4801     const TCGLifeData arg_life = op->life;
4802     TCGRegSet allocated_regs, preferred_regs;
4803     TCGTemp *ts, *ots;
4804     TCGType otype, itype;
4805     TCGReg oreg, ireg;
4806 
4807     allocated_regs = s->reserved_regs;
4808     preferred_regs = output_pref(op, 0);
4809     ots = arg_temp(op->args[0]);
4810     ts = arg_temp(op->args[1]);
4811 
4812     /* ENV should not be modified.  */
4813     tcg_debug_assert(!temp_readonly(ots));
4814 
4815     /* Note that otype != itype for no-op truncation.  */
4816     otype = ots->type;
4817     itype = ts->type;
4818 
4819     if (ts->val_type == TEMP_VAL_CONST) {
4820         /* propagate constant or generate sti */
4821         tcg_target_ulong val = ts->val;
4822         if (IS_DEAD_ARG(1)) {
4823             temp_dead(s, ts);
4824         }
4825         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4826         return;
4827     }
4828 
4829     /* If the source value is in memory we're going to be forced
4830        to have it in a register in order to perform the copy.  Copy
4831        the SOURCE value into its own register first, that way we
4832        don't have to reload SOURCE the next time it is used. */
4833     if (ts->val_type == TEMP_VAL_MEM) {
4834         temp_load(s, ts, tcg_target_available_regs[itype],
4835                   allocated_regs, preferred_regs);
4836     }
4837     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4838     ireg = ts->reg;
4839 
4840     if (IS_DEAD_ARG(0)) {
4841         /* mov to a non-saved dead register makes no sense (even with
4842            liveness analysis disabled). */
4843         tcg_debug_assert(NEED_SYNC_ARG(0));
4844         if (!ots->mem_allocated) {
4845             temp_allocate_frame(s, ots);
4846         }
4847         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4848         if (IS_DEAD_ARG(1)) {
4849             temp_dead(s, ts);
4850         }
4851         temp_dead(s, ots);
4852         return;
4853     }
4854 
4855     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4856         /*
4857          * The mov can be suppressed.  Kill input first, so that it
4858          * is unlinked from reg_to_temp, then set the output to the
4859          * reg that we saved from the input.
4860          */
4861         temp_dead(s, ts);
4862         oreg = ireg;
4863     } else {
4864         if (ots->val_type == TEMP_VAL_REG) {
4865             oreg = ots->reg;
4866         } else {
4867             /* Make sure to not spill the input register during allocation. */
4868             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4869                                  allocated_regs | ((TCGRegSet)1 << ireg),
4870                                  preferred_regs, ots->indirect_base);
4871         }
4872         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4873             /*
4874              * Cross register class move not supported.
4875              * Store the source register into the destination slot
4876              * and leave the destination temp as TEMP_VAL_MEM.
4877              */
4878             assert(!temp_readonly(ots));
4879             if (!ts->mem_allocated) {
4880                 temp_allocate_frame(s, ots);
4881             }
4882             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4883             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4884             ots->mem_coherent = 1;
4885             return;
4886         }
4887     }
4888     set_temp_val_reg(s, ots, oreg);
4889     ots->mem_coherent = 0;
4890 
4891     if (NEED_SYNC_ARG(0)) {
4892         temp_sync(s, ots, allocated_regs, 0, 0);
4893     }
4894 }
4895 
4896 /*
4897  * Specialized code generation for INDEX_op_dup_vec.
4898  */
4899 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4900 {
4901     const TCGLifeData arg_life = op->life;
4902     TCGRegSet dup_out_regs, dup_in_regs;
4903     const TCGArgConstraint *dup_args_ct;
4904     TCGTemp *its, *ots;
4905     TCGType itype, vtype;
4906     unsigned vece;
4907     int lowpart_ofs;
4908     bool ok;
4909 
4910     ots = arg_temp(op->args[0]);
4911     its = arg_temp(op->args[1]);
4912 
4913     /* ENV should not be modified.  */
4914     tcg_debug_assert(!temp_readonly(ots));
4915 
4916     itype = its->type;
4917     vece = TCGOP_VECE(op);
4918     vtype = TCGOP_TYPE(op);
4919 
4920     if (its->val_type == TEMP_VAL_CONST) {
4921         /* Propagate constant via movi -> dupi.  */
4922         tcg_target_ulong val = its->val;
4923         if (IS_DEAD_ARG(1)) {
4924             temp_dead(s, its);
4925         }
4926         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4927         return;
4928     }
4929 
4930     dup_args_ct = opcode_args_ct(op);
4931     dup_out_regs = dup_args_ct[0].regs;
4932     dup_in_regs = dup_args_ct[1].regs;
4933 
4934     /* Allocate the output register now.  */
4935     if (ots->val_type != TEMP_VAL_REG) {
4936         TCGRegSet allocated_regs = s->reserved_regs;
4937         TCGReg oreg;
4938 
4939         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4940             /* Make sure to not spill the input register. */
4941             tcg_regset_set_reg(allocated_regs, its->reg);
4942         }
4943         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4944                              output_pref(op, 0), ots->indirect_base);
4945         set_temp_val_reg(s, ots, oreg);
4946     }
4947 
4948     switch (its->val_type) {
4949     case TEMP_VAL_REG:
4950         /*
4951          * The dup constriaints must be broad, covering all possible VECE.
4952          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4953          * to fail, indicating that extra moves are required for that case.
4954          */
4955         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4956             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4957                 goto done;
4958             }
4959             /* Try again from memory or a vector input register.  */
4960         }
4961         if (!its->mem_coherent) {
4962             /*
4963              * The input register is not synced, and so an extra store
4964              * would be required to use memory.  Attempt an integer-vector
4965              * register move first.  We do not have a TCGRegSet for this.
4966              */
4967             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4968                 break;
4969             }
4970             /* Sync the temp back to its slot and load from there.  */
4971             temp_sync(s, its, s->reserved_regs, 0, 0);
4972         }
4973         /* fall through */
4974 
4975     case TEMP_VAL_MEM:
4976         lowpart_ofs = 0;
4977         if (HOST_BIG_ENDIAN) {
4978             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4979         }
4980         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4981                              its->mem_offset + lowpart_ofs)) {
4982             goto done;
4983         }
4984         /* Load the input into the destination vector register. */
4985         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4986         break;
4987 
4988     default:
4989         g_assert_not_reached();
4990     }
4991 
4992     /* We now have a vector input register, so dup must succeed. */
4993     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4994     tcg_debug_assert(ok);
4995 
4996  done:
4997     ots->mem_coherent = 0;
4998     if (IS_DEAD_ARG(1)) {
4999         temp_dead(s, its);
5000     }
5001     if (NEED_SYNC_ARG(0)) {
5002         temp_sync(s, ots, s->reserved_regs, 0, 0);
5003     }
5004     if (IS_DEAD_ARG(0)) {
5005         temp_dead(s, ots);
5006     }
5007 }
5008 
5009 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5010 {
5011     const TCGLifeData arg_life = op->life;
5012     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5013     TCGRegSet i_allocated_regs;
5014     TCGRegSet o_allocated_regs;
5015     int i, k, nb_iargs, nb_oargs;
5016     TCGReg reg;
5017     TCGArg arg;
5018     const TCGArgConstraint *args_ct;
5019     const TCGArgConstraint *arg_ct;
5020     TCGTemp *ts;
5021     TCGArg new_args[TCG_MAX_OP_ARGS];
5022     int const_args[TCG_MAX_OP_ARGS];
5023     TCGCond op_cond;
5024 
5025     nb_oargs = def->nb_oargs;
5026     nb_iargs = def->nb_iargs;
5027 
5028     /* copy constants */
5029     memcpy(new_args + nb_oargs + nb_iargs,
5030            op->args + nb_oargs + nb_iargs,
5031            sizeof(TCGArg) * def->nb_cargs);
5032 
5033     i_allocated_regs = s->reserved_regs;
5034     o_allocated_regs = s->reserved_regs;
5035 
5036     switch (op->opc) {
5037     case INDEX_op_brcond_i32:
5038     case INDEX_op_brcond_i64:
5039         op_cond = op->args[2];
5040         break;
5041     case INDEX_op_setcond_i32:
5042     case INDEX_op_setcond_i64:
5043     case INDEX_op_negsetcond_i32:
5044     case INDEX_op_negsetcond_i64:
5045     case INDEX_op_cmp_vec:
5046         op_cond = op->args[3];
5047         break;
5048     case INDEX_op_brcond2_i32:
5049         op_cond = op->args[4];
5050         break;
5051     case INDEX_op_movcond_i32:
5052     case INDEX_op_movcond_i64:
5053     case INDEX_op_setcond2_i32:
5054     case INDEX_op_cmpsel_vec:
5055         op_cond = op->args[5];
5056         break;
5057     default:
5058         /* No condition within opcode. */
5059         op_cond = TCG_COND_ALWAYS;
5060         break;
5061     }
5062 
5063     args_ct = opcode_args_ct(op);
5064 
5065     /* satisfy input constraints */
5066     for (k = 0; k < nb_iargs; k++) {
5067         TCGRegSet i_preferred_regs, i_required_regs;
5068         bool allocate_new_reg, copyto_new_reg;
5069         TCGTemp *ts2;
5070         int i1, i2;
5071 
5072         i = args_ct[nb_oargs + k].sort_index;
5073         arg = op->args[i];
5074         arg_ct = &args_ct[i];
5075         ts = arg_temp(arg);
5076 
5077         if (ts->val_type == TEMP_VAL_CONST
5078             && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5079                                       op_cond, TCGOP_VECE(op))) {
5080             /* constant is OK for instruction */
5081             const_args[i] = 1;
5082             new_args[i] = ts->val;
5083             continue;
5084         }
5085 
5086         reg = ts->reg;
5087         i_preferred_regs = 0;
5088         i_required_regs = arg_ct->regs;
5089         allocate_new_reg = false;
5090         copyto_new_reg = false;
5091 
5092         switch (arg_ct->pair) {
5093         case 0: /* not paired */
5094             if (arg_ct->ialias) {
5095                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5096 
5097                 /*
5098                  * If the input is readonly, then it cannot also be an
5099                  * output and aliased to itself.  If the input is not
5100                  * dead after the instruction, we must allocate a new
5101                  * register and move it.
5102                  */
5103                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5104                     || args_ct[arg_ct->alias_index].newreg) {
5105                     allocate_new_reg = true;
5106                 } else if (ts->val_type == TEMP_VAL_REG) {
5107                     /*
5108                      * Check if the current register has already been
5109                      * allocated for another input.
5110                      */
5111                     allocate_new_reg =
5112                         tcg_regset_test_reg(i_allocated_regs, reg);
5113                 }
5114             }
5115             if (!allocate_new_reg) {
5116                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5117                           i_preferred_regs);
5118                 reg = ts->reg;
5119                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5120             }
5121             if (allocate_new_reg) {
5122                 /*
5123                  * Allocate a new register matching the constraint
5124                  * and move the temporary register into it.
5125                  */
5126                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5127                           i_allocated_regs, 0);
5128                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5129                                     i_preferred_regs, ts->indirect_base);
5130                 copyto_new_reg = true;
5131             }
5132             break;
5133 
5134         case 1:
5135             /* First of an input pair; if i1 == i2, the second is an output. */
5136             i1 = i;
5137             i2 = arg_ct->pair_index;
5138             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5139 
5140             /*
5141              * It is easier to default to allocating a new pair
5142              * and to identify a few cases where it's not required.
5143              */
5144             if (arg_ct->ialias) {
5145                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5146                 if (IS_DEAD_ARG(i1) &&
5147                     IS_DEAD_ARG(i2) &&
5148                     !temp_readonly(ts) &&
5149                     ts->val_type == TEMP_VAL_REG &&
5150                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5151                     tcg_regset_test_reg(i_required_regs, reg) &&
5152                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5153                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5154                     (ts2
5155                      ? ts2->val_type == TEMP_VAL_REG &&
5156                        ts2->reg == reg + 1 &&
5157                        !temp_readonly(ts2)
5158                      : s->reg_to_temp[reg + 1] == NULL)) {
5159                     break;
5160                 }
5161             } else {
5162                 /* Without aliasing, the pair must also be an input. */
5163                 tcg_debug_assert(ts2);
5164                 if (ts->val_type == TEMP_VAL_REG &&
5165                     ts2->val_type == TEMP_VAL_REG &&
5166                     ts2->reg == reg + 1 &&
5167                     tcg_regset_test_reg(i_required_regs, reg)) {
5168                     break;
5169                 }
5170             }
5171             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5172                                      0, ts->indirect_base);
5173             goto do_pair;
5174 
5175         case 2: /* pair second */
5176             reg = new_args[arg_ct->pair_index] + 1;
5177             goto do_pair;
5178 
5179         case 3: /* ialias with second output, no first input */
5180             tcg_debug_assert(arg_ct->ialias);
5181             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5182 
5183             if (IS_DEAD_ARG(i) &&
5184                 !temp_readonly(ts) &&
5185                 ts->val_type == TEMP_VAL_REG &&
5186                 reg > 0 &&
5187                 s->reg_to_temp[reg - 1] == NULL &&
5188                 tcg_regset_test_reg(i_required_regs, reg) &&
5189                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5190                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5191                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5192                 break;
5193             }
5194             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5195                                      i_allocated_regs, 0,
5196                                      ts->indirect_base);
5197             tcg_regset_set_reg(i_allocated_regs, reg);
5198             reg += 1;
5199             goto do_pair;
5200 
5201         do_pair:
5202             /*
5203              * If an aliased input is not dead after the instruction,
5204              * we must allocate a new register and move it.
5205              */
5206             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5207                 TCGRegSet t_allocated_regs = i_allocated_regs;
5208 
5209                 /*
5210                  * Because of the alias, and the continued life, make sure
5211                  * that the temp is somewhere *other* than the reg pair,
5212                  * and we get a copy in reg.
5213                  */
5214                 tcg_regset_set_reg(t_allocated_regs, reg);
5215                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5216                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5217                     /* If ts was already in reg, copy it somewhere else. */
5218                     TCGReg nr;
5219                     bool ok;
5220 
5221                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5222                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5223                                        t_allocated_regs, 0, ts->indirect_base);
5224                     ok = tcg_out_mov(s, ts->type, nr, reg);
5225                     tcg_debug_assert(ok);
5226 
5227                     set_temp_val_reg(s, ts, nr);
5228                 } else {
5229                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5230                               t_allocated_regs, 0);
5231                     copyto_new_reg = true;
5232                 }
5233             } else {
5234                 /* Preferably allocate to reg, otherwise copy. */
5235                 i_required_regs = (TCGRegSet)1 << reg;
5236                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5237                           i_preferred_regs);
5238                 copyto_new_reg = ts->reg != reg;
5239             }
5240             break;
5241 
5242         default:
5243             g_assert_not_reached();
5244         }
5245 
5246         if (copyto_new_reg) {
5247             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5248                 /*
5249                  * Cross register class move not supported.  Sync the
5250                  * temp back to its slot and load from there.
5251                  */
5252                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5253                 tcg_out_ld(s, ts->type, reg,
5254                            ts->mem_base->reg, ts->mem_offset);
5255             }
5256         }
5257         new_args[i] = reg;
5258         const_args[i] = 0;
5259         tcg_regset_set_reg(i_allocated_regs, reg);
5260     }
5261 
5262     /* mark dead temporaries and free the associated registers */
5263     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5264         if (IS_DEAD_ARG(i)) {
5265             temp_dead(s, arg_temp(op->args[i]));
5266         }
5267     }
5268 
5269     if (def->flags & TCG_OPF_COND_BRANCH) {
5270         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5271     } else if (def->flags & TCG_OPF_BB_END) {
5272         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5273     } else {
5274         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5275             /* XXX: permit generic clobber register list ? */
5276             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5277                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5278                     tcg_reg_free(s, i, i_allocated_regs);
5279                 }
5280             }
5281         }
5282         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5283             /* sync globals if the op has side effects and might trigger
5284                an exception. */
5285             sync_globals(s, i_allocated_regs);
5286         }
5287 
5288         /* satisfy the output constraints */
5289         for (k = 0; k < nb_oargs; k++) {
5290             i = args_ct[k].sort_index;
5291             arg = op->args[i];
5292             arg_ct = &args_ct[i];
5293             ts = arg_temp(arg);
5294 
5295             /* ENV should not be modified.  */
5296             tcg_debug_assert(!temp_readonly(ts));
5297 
5298             switch (arg_ct->pair) {
5299             case 0: /* not paired */
5300                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5301                     reg = new_args[arg_ct->alias_index];
5302                 } else if (arg_ct->newreg) {
5303                     reg = tcg_reg_alloc(s, arg_ct->regs,
5304                                         i_allocated_regs | o_allocated_regs,
5305                                         output_pref(op, k), ts->indirect_base);
5306                 } else {
5307                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5308                                         output_pref(op, k), ts->indirect_base);
5309                 }
5310                 break;
5311 
5312             case 1: /* first of pair */
5313                 if (arg_ct->oalias) {
5314                     reg = new_args[arg_ct->alias_index];
5315                 } else if (arg_ct->newreg) {
5316                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5317                                              i_allocated_regs | o_allocated_regs,
5318                                              output_pref(op, k),
5319                                              ts->indirect_base);
5320                 } else {
5321                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5322                                              output_pref(op, k),
5323                                              ts->indirect_base);
5324                 }
5325                 break;
5326 
5327             case 2: /* second of pair */
5328                 if (arg_ct->oalias) {
5329                     reg = new_args[arg_ct->alias_index];
5330                 } else {
5331                     reg = new_args[arg_ct->pair_index] + 1;
5332                 }
5333                 break;
5334 
5335             case 3: /* first of pair, aliasing with a second input */
5336                 tcg_debug_assert(!arg_ct->newreg);
5337                 reg = new_args[arg_ct->pair_index] - 1;
5338                 break;
5339 
5340             default:
5341                 g_assert_not_reached();
5342             }
5343             tcg_regset_set_reg(o_allocated_regs, reg);
5344             set_temp_val_reg(s, ts, reg);
5345             ts->mem_coherent = 0;
5346             new_args[i] = reg;
5347         }
5348     }
5349 
5350     /* emit instruction */
5351     switch (op->opc) {
5352     case INDEX_op_ext8s_i32:
5353         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5354         break;
5355     case INDEX_op_ext8s_i64:
5356         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5357         break;
5358     case INDEX_op_ext8u_i32:
5359     case INDEX_op_ext8u_i64:
5360         tcg_out_ext8u(s, new_args[0], new_args[1]);
5361         break;
5362     case INDEX_op_ext16s_i32:
5363         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5364         break;
5365     case INDEX_op_ext16s_i64:
5366         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5367         break;
5368     case INDEX_op_ext16u_i32:
5369     case INDEX_op_ext16u_i64:
5370         tcg_out_ext16u(s, new_args[0], new_args[1]);
5371         break;
5372     case INDEX_op_ext32s_i64:
5373         tcg_out_ext32s(s, new_args[0], new_args[1]);
5374         break;
5375     case INDEX_op_ext32u_i64:
5376         tcg_out_ext32u(s, new_args[0], new_args[1]);
5377         break;
5378     case INDEX_op_ext_i32_i64:
5379         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5380         break;
5381     case INDEX_op_extu_i32_i64:
5382         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5383         break;
5384     case INDEX_op_extrl_i64_i32:
5385         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5386         break;
5387     default:
5388         if (def->flags & TCG_OPF_VECTOR) {
5389             tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
5390                            TCGOP_VECE(op), new_args, const_args);
5391         } else {
5392             tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
5393         }
5394         break;
5395     }
5396 
5397     /* move the outputs in the correct register if needed */
5398     for(i = 0; i < nb_oargs; i++) {
5399         ts = arg_temp(op->args[i]);
5400 
5401         /* ENV should not be modified.  */
5402         tcg_debug_assert(!temp_readonly(ts));
5403 
5404         if (NEED_SYNC_ARG(i)) {
5405             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5406         } else if (IS_DEAD_ARG(i)) {
5407             temp_dead(s, ts);
5408         }
5409     }
5410 }
5411 
5412 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5413 {
5414     const TCGLifeData arg_life = op->life;
5415     TCGTemp *ots, *itsl, *itsh;
5416     TCGType vtype = TCGOP_TYPE(op);
5417 
5418     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5419     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5420     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5421 
5422     ots = arg_temp(op->args[0]);
5423     itsl = arg_temp(op->args[1]);
5424     itsh = arg_temp(op->args[2]);
5425 
5426     /* ENV should not be modified.  */
5427     tcg_debug_assert(!temp_readonly(ots));
5428 
5429     /* Allocate the output register now.  */
5430     if (ots->val_type != TEMP_VAL_REG) {
5431         TCGRegSet allocated_regs = s->reserved_regs;
5432         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5433         TCGReg oreg;
5434 
5435         /* Make sure to not spill the input registers. */
5436         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5437             tcg_regset_set_reg(allocated_regs, itsl->reg);
5438         }
5439         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5440             tcg_regset_set_reg(allocated_regs, itsh->reg);
5441         }
5442 
5443         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5444                              output_pref(op, 0), ots->indirect_base);
5445         set_temp_val_reg(s, ots, oreg);
5446     }
5447 
5448     /* Promote dup2 of immediates to dupi_vec. */
5449     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5450         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5451         MemOp vece = MO_64;
5452 
5453         if (val == dup_const(MO_8, val)) {
5454             vece = MO_8;
5455         } else if (val == dup_const(MO_16, val)) {
5456             vece = MO_16;
5457         } else if (val == dup_const(MO_32, val)) {
5458             vece = MO_32;
5459         }
5460 
5461         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5462         goto done;
5463     }
5464 
5465     /* If the two inputs form one 64-bit value, try dupm_vec. */
5466     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5467         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5468         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5469         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5470 
5471         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5472         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5473 
5474         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5475                              its->mem_base->reg, its->mem_offset)) {
5476             goto done;
5477         }
5478     }
5479 
5480     /* Fall back to generic expansion. */
5481     return false;
5482 
5483  done:
5484     ots->mem_coherent = 0;
5485     if (IS_DEAD_ARG(1)) {
5486         temp_dead(s, itsl);
5487     }
5488     if (IS_DEAD_ARG(2)) {
5489         temp_dead(s, itsh);
5490     }
5491     if (NEED_SYNC_ARG(0)) {
5492         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5493     } else if (IS_DEAD_ARG(0)) {
5494         temp_dead(s, ots);
5495     }
5496     return true;
5497 }
5498 
5499 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5500                          TCGRegSet allocated_regs)
5501 {
5502     if (ts->val_type == TEMP_VAL_REG) {
5503         if (ts->reg != reg) {
5504             tcg_reg_free(s, reg, allocated_regs);
5505             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5506                 /*
5507                  * Cross register class move not supported.  Sync the
5508                  * temp back to its slot and load from there.
5509                  */
5510                 temp_sync(s, ts, allocated_regs, 0, 0);
5511                 tcg_out_ld(s, ts->type, reg,
5512                            ts->mem_base->reg, ts->mem_offset);
5513             }
5514         }
5515     } else {
5516         TCGRegSet arg_set = 0;
5517 
5518         tcg_reg_free(s, reg, allocated_regs);
5519         tcg_regset_set_reg(arg_set, reg);
5520         temp_load(s, ts, arg_set, allocated_regs, 0);
5521     }
5522 }
5523 
5524 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5525                          TCGRegSet allocated_regs)
5526 {
5527     /*
5528      * When the destination is on the stack, load up the temp and store.
5529      * If there are many call-saved registers, the temp might live to
5530      * see another use; otherwise it'll be discarded.
5531      */
5532     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5533     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5534                arg_slot_stk_ofs(arg_slot));
5535 }
5536 
5537 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5538                             TCGTemp *ts, TCGRegSet *allocated_regs)
5539 {
5540     if (arg_slot_reg_p(l->arg_slot)) {
5541         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5542         load_arg_reg(s, reg, ts, *allocated_regs);
5543         tcg_regset_set_reg(*allocated_regs, reg);
5544     } else {
5545         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5546     }
5547 }
5548 
5549 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5550                          intptr_t ref_off, TCGRegSet *allocated_regs)
5551 {
5552     TCGReg reg;
5553 
5554     if (arg_slot_reg_p(arg_slot)) {
5555         reg = tcg_target_call_iarg_regs[arg_slot];
5556         tcg_reg_free(s, reg, *allocated_regs);
5557         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5558         tcg_regset_set_reg(*allocated_regs, reg);
5559     } else {
5560         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5561                             *allocated_regs, 0, false);
5562         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5563         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5564                    arg_slot_stk_ofs(arg_slot));
5565     }
5566 }
5567 
5568 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5569 {
5570     const int nb_oargs = TCGOP_CALLO(op);
5571     const int nb_iargs = TCGOP_CALLI(op);
5572     const TCGLifeData arg_life = op->life;
5573     const TCGHelperInfo *info = tcg_call_info(op);
5574     TCGRegSet allocated_regs = s->reserved_regs;
5575     int i;
5576 
5577     /*
5578      * Move inputs into place in reverse order,
5579      * so that we place stacked arguments first.
5580      */
5581     for (i = nb_iargs - 1; i >= 0; --i) {
5582         const TCGCallArgumentLoc *loc = &info->in[i];
5583         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5584 
5585         switch (loc->kind) {
5586         case TCG_CALL_ARG_NORMAL:
5587         case TCG_CALL_ARG_EXTEND_U:
5588         case TCG_CALL_ARG_EXTEND_S:
5589             load_arg_normal(s, loc, ts, &allocated_regs);
5590             break;
5591         case TCG_CALL_ARG_BY_REF:
5592             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5593             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5594                          arg_slot_stk_ofs(loc->ref_slot),
5595                          &allocated_regs);
5596             break;
5597         case TCG_CALL_ARG_BY_REF_N:
5598             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5599             break;
5600         default:
5601             g_assert_not_reached();
5602         }
5603     }
5604 
5605     /* Mark dead temporaries and free the associated registers.  */
5606     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5607         if (IS_DEAD_ARG(i)) {
5608             temp_dead(s, arg_temp(op->args[i]));
5609         }
5610     }
5611 
5612     /* Clobber call registers.  */
5613     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5614         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5615             tcg_reg_free(s, i, allocated_regs);
5616         }
5617     }
5618 
5619     /*
5620      * Save globals if they might be written by the helper,
5621      * sync them if they might be read.
5622      */
5623     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5624         /* Nothing to do */
5625     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5626         sync_globals(s, allocated_regs);
5627     } else {
5628         save_globals(s, allocated_regs);
5629     }
5630 
5631     /*
5632      * If the ABI passes a pointer to the returned struct as the first
5633      * argument, load that now.  Pass a pointer to the output home slot.
5634      */
5635     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5636         TCGTemp *ts = arg_temp(op->args[0]);
5637 
5638         if (!ts->mem_allocated) {
5639             temp_allocate_frame(s, ts);
5640         }
5641         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5642     }
5643 
5644     tcg_out_call(s, tcg_call_func(op), info);
5645 
5646     /* Assign output registers and emit moves if needed.  */
5647     switch (info->out_kind) {
5648     case TCG_CALL_RET_NORMAL:
5649         for (i = 0; i < nb_oargs; i++) {
5650             TCGTemp *ts = arg_temp(op->args[i]);
5651             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5652 
5653             /* ENV should not be modified.  */
5654             tcg_debug_assert(!temp_readonly(ts));
5655 
5656             set_temp_val_reg(s, ts, reg);
5657             ts->mem_coherent = 0;
5658         }
5659         break;
5660 
5661     case TCG_CALL_RET_BY_VEC:
5662         {
5663             TCGTemp *ts = arg_temp(op->args[0]);
5664 
5665             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5666             tcg_debug_assert(ts->temp_subindex == 0);
5667             if (!ts->mem_allocated) {
5668                 temp_allocate_frame(s, ts);
5669             }
5670             tcg_out_st(s, TCG_TYPE_V128,
5671                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5672                        ts->mem_base->reg, ts->mem_offset);
5673         }
5674         /* fall through to mark all parts in memory */
5675 
5676     case TCG_CALL_RET_BY_REF:
5677         /* The callee has performed a write through the reference. */
5678         for (i = 0; i < nb_oargs; i++) {
5679             TCGTemp *ts = arg_temp(op->args[i]);
5680             ts->val_type = TEMP_VAL_MEM;
5681         }
5682         break;
5683 
5684     default:
5685         g_assert_not_reached();
5686     }
5687 
5688     /* Flush or discard output registers as needed. */
5689     for (i = 0; i < nb_oargs; i++) {
5690         TCGTemp *ts = arg_temp(op->args[i]);
5691         if (NEED_SYNC_ARG(i)) {
5692             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5693         } else if (IS_DEAD_ARG(i)) {
5694             temp_dead(s, ts);
5695         }
5696     }
5697 }
5698 
5699 /**
5700  * atom_and_align_for_opc:
5701  * @s: tcg context
5702  * @opc: memory operation code
5703  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5704  * @allow_two_ops: true if we are prepared to issue two operations
5705  *
5706  * Return the alignment and atomicity to use for the inline fast path
5707  * for the given memory operation.  The alignment may be larger than
5708  * that specified in @opc, and the correct alignment will be diagnosed
5709  * by the slow path helper.
5710  *
5711  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5712  * and issue two loads or stores for subalignment.
5713  */
5714 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5715                                            MemOp host_atom, bool allow_two_ops)
5716 {
5717     MemOp align = memop_alignment_bits(opc);
5718     MemOp size = opc & MO_SIZE;
5719     MemOp half = size ? size - 1 : 0;
5720     MemOp atom = opc & MO_ATOM_MASK;
5721     MemOp atmax;
5722 
5723     switch (atom) {
5724     case MO_ATOM_NONE:
5725         /* The operation requires no specific atomicity. */
5726         atmax = MO_8;
5727         break;
5728 
5729     case MO_ATOM_IFALIGN:
5730         atmax = size;
5731         break;
5732 
5733     case MO_ATOM_IFALIGN_PAIR:
5734         atmax = half;
5735         break;
5736 
5737     case MO_ATOM_WITHIN16:
5738         atmax = size;
5739         if (size == MO_128) {
5740             /* Misalignment implies !within16, and therefore no atomicity. */
5741         } else if (host_atom != MO_ATOM_WITHIN16) {
5742             /* The host does not implement within16, so require alignment. */
5743             align = MAX(align, size);
5744         }
5745         break;
5746 
5747     case MO_ATOM_WITHIN16_PAIR:
5748         atmax = size;
5749         /*
5750          * Misalignment implies !within16, and therefore half atomicity.
5751          * Any host prepared for two operations can implement this with
5752          * half alignment.
5753          */
5754         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5755             align = MAX(align, half);
5756         }
5757         break;
5758 
5759     case MO_ATOM_SUBALIGN:
5760         atmax = size;
5761         if (host_atom != MO_ATOM_SUBALIGN) {
5762             /* If unaligned but not odd, there are subobjects up to half. */
5763             if (allow_two_ops) {
5764                 align = MAX(align, half);
5765             } else {
5766                 align = MAX(align, size);
5767             }
5768         }
5769         break;
5770 
5771     default:
5772         g_assert_not_reached();
5773     }
5774 
5775     return (TCGAtomAlign){ .atom = atmax, .align = align };
5776 }
5777 
5778 /*
5779  * Similarly for qemu_ld/st slow path helpers.
5780  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5781  * using only the provided backend tcg_out_* functions.
5782  */
5783 
5784 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5785 {
5786     int ofs = arg_slot_stk_ofs(slot);
5787 
5788     /*
5789      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5790      * require extension to uint64_t, adjust the address for uint32_t.
5791      */
5792     if (HOST_BIG_ENDIAN &&
5793         TCG_TARGET_REG_BITS == 64 &&
5794         type == TCG_TYPE_I32) {
5795         ofs += 4;
5796     }
5797     return ofs;
5798 }
5799 
5800 static void tcg_out_helper_load_slots(TCGContext *s,
5801                                       unsigned nmov, TCGMovExtend *mov,
5802                                       const TCGLdstHelperParam *parm)
5803 {
5804     unsigned i;
5805     TCGReg dst3;
5806 
5807     /*
5808      * Start from the end, storing to the stack first.
5809      * This frees those registers, so we need not consider overlap.
5810      */
5811     for (i = nmov; i-- > 0; ) {
5812         unsigned slot = mov[i].dst;
5813 
5814         if (arg_slot_reg_p(slot)) {
5815             goto found_reg;
5816         }
5817 
5818         TCGReg src = mov[i].src;
5819         TCGType dst_type = mov[i].dst_type;
5820         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5821 
5822         /* The argument is going onto the stack; extend into scratch. */
5823         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5824             tcg_debug_assert(parm->ntmp != 0);
5825             mov[i].dst = src = parm->tmp[0];
5826             tcg_out_movext1(s, &mov[i]);
5827         }
5828 
5829         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5830                    tcg_out_helper_stk_ofs(dst_type, slot));
5831     }
5832     return;
5833 
5834  found_reg:
5835     /*
5836      * The remaining arguments are in registers.
5837      * Convert slot numbers to argument registers.
5838      */
5839     nmov = i + 1;
5840     for (i = 0; i < nmov; ++i) {
5841         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5842     }
5843 
5844     switch (nmov) {
5845     case 4:
5846         /* The backend must have provided enough temps for the worst case. */
5847         tcg_debug_assert(parm->ntmp >= 2);
5848 
5849         dst3 = mov[3].dst;
5850         for (unsigned j = 0; j < 3; ++j) {
5851             if (dst3 == mov[j].src) {
5852                 /*
5853                  * Conflict. Copy the source to a temporary, perform the
5854                  * remaining moves, then the extension from our scratch
5855                  * on the way out.
5856                  */
5857                 TCGReg scratch = parm->tmp[1];
5858 
5859                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5860                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5861                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5862                 break;
5863             }
5864         }
5865 
5866         /* No conflicts: perform this move and continue. */
5867         tcg_out_movext1(s, &mov[3]);
5868         /* fall through */
5869 
5870     case 3:
5871         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5872                         parm->ntmp ? parm->tmp[0] : -1);
5873         break;
5874     case 2:
5875         tcg_out_movext2(s, mov, mov + 1,
5876                         parm->ntmp ? parm->tmp[0] : -1);
5877         break;
5878     case 1:
5879         tcg_out_movext1(s, mov);
5880         break;
5881     default:
5882         g_assert_not_reached();
5883     }
5884 }
5885 
5886 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5887                                     TCGType type, tcg_target_long imm,
5888                                     const TCGLdstHelperParam *parm)
5889 {
5890     if (arg_slot_reg_p(slot)) {
5891         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5892     } else {
5893         int ofs = tcg_out_helper_stk_ofs(type, slot);
5894         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5895             tcg_debug_assert(parm->ntmp != 0);
5896             tcg_out_movi(s, type, parm->tmp[0], imm);
5897             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5898         }
5899     }
5900 }
5901 
5902 static void tcg_out_helper_load_common_args(TCGContext *s,
5903                                             const TCGLabelQemuLdst *ldst,
5904                                             const TCGLdstHelperParam *parm,
5905                                             const TCGHelperInfo *info,
5906                                             unsigned next_arg)
5907 {
5908     TCGMovExtend ptr_mov = {
5909         .dst_type = TCG_TYPE_PTR,
5910         .src_type = TCG_TYPE_PTR,
5911         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5912     };
5913     const TCGCallArgumentLoc *loc = &info->in[0];
5914     TCGType type;
5915     unsigned slot;
5916     tcg_target_ulong imm;
5917 
5918     /*
5919      * Handle env, which is always first.
5920      */
5921     ptr_mov.dst = loc->arg_slot;
5922     ptr_mov.src = TCG_AREG0;
5923     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5924 
5925     /*
5926      * Handle oi.
5927      */
5928     imm = ldst->oi;
5929     loc = &info->in[next_arg];
5930     type = TCG_TYPE_I32;
5931     switch (loc->kind) {
5932     case TCG_CALL_ARG_NORMAL:
5933         break;
5934     case TCG_CALL_ARG_EXTEND_U:
5935     case TCG_CALL_ARG_EXTEND_S:
5936         /* No extension required for MemOpIdx. */
5937         tcg_debug_assert(imm <= INT32_MAX);
5938         type = TCG_TYPE_REG;
5939         break;
5940     default:
5941         g_assert_not_reached();
5942     }
5943     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5944     next_arg++;
5945 
5946     /*
5947      * Handle ra.
5948      */
5949     loc = &info->in[next_arg];
5950     slot = loc->arg_slot;
5951     if (parm->ra_gen) {
5952         int arg_reg = -1;
5953         TCGReg ra_reg;
5954 
5955         if (arg_slot_reg_p(slot)) {
5956             arg_reg = tcg_target_call_iarg_regs[slot];
5957         }
5958         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5959 
5960         ptr_mov.dst = slot;
5961         ptr_mov.src = ra_reg;
5962         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5963     } else {
5964         imm = (uintptr_t)ldst->raddr;
5965         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5966     }
5967 }
5968 
5969 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5970                                        const TCGCallArgumentLoc *loc,
5971                                        TCGType dst_type, TCGType src_type,
5972                                        TCGReg lo, TCGReg hi)
5973 {
5974     MemOp reg_mo;
5975 
5976     if (dst_type <= TCG_TYPE_REG) {
5977         MemOp src_ext;
5978 
5979         switch (loc->kind) {
5980         case TCG_CALL_ARG_NORMAL:
5981             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5982             break;
5983         case TCG_CALL_ARG_EXTEND_U:
5984             dst_type = TCG_TYPE_REG;
5985             src_ext = MO_UL;
5986             break;
5987         case TCG_CALL_ARG_EXTEND_S:
5988             dst_type = TCG_TYPE_REG;
5989             src_ext = MO_SL;
5990             break;
5991         default:
5992             g_assert_not_reached();
5993         }
5994 
5995         mov[0].dst = loc->arg_slot;
5996         mov[0].dst_type = dst_type;
5997         mov[0].src = lo;
5998         mov[0].src_type = src_type;
5999         mov[0].src_ext = src_ext;
6000         return 1;
6001     }
6002 
6003     if (TCG_TARGET_REG_BITS == 32) {
6004         assert(dst_type == TCG_TYPE_I64);
6005         reg_mo = MO_32;
6006     } else {
6007         assert(dst_type == TCG_TYPE_I128);
6008         reg_mo = MO_64;
6009     }
6010 
6011     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6012     mov[0].src = lo;
6013     mov[0].dst_type = TCG_TYPE_REG;
6014     mov[0].src_type = TCG_TYPE_REG;
6015     mov[0].src_ext = reg_mo;
6016 
6017     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6018     mov[1].src = hi;
6019     mov[1].dst_type = TCG_TYPE_REG;
6020     mov[1].src_type = TCG_TYPE_REG;
6021     mov[1].src_ext = reg_mo;
6022 
6023     return 2;
6024 }
6025 
6026 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6027                                    const TCGLdstHelperParam *parm)
6028 {
6029     const TCGHelperInfo *info;
6030     const TCGCallArgumentLoc *loc;
6031     TCGMovExtend mov[2];
6032     unsigned next_arg, nmov;
6033     MemOp mop = get_memop(ldst->oi);
6034 
6035     switch (mop & MO_SIZE) {
6036     case MO_8:
6037     case MO_16:
6038     case MO_32:
6039         info = &info_helper_ld32_mmu;
6040         break;
6041     case MO_64:
6042         info = &info_helper_ld64_mmu;
6043         break;
6044     case MO_128:
6045         info = &info_helper_ld128_mmu;
6046         break;
6047     default:
6048         g_assert_not_reached();
6049     }
6050 
6051     /* Defer env argument. */
6052     next_arg = 1;
6053 
6054     loc = &info->in[next_arg];
6055     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6056         /*
6057          * 32-bit host with 32-bit guest: zero-extend the guest address
6058          * to 64-bits for the helper by storing the low part, then
6059          * load a zero for the high part.
6060          */
6061         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6062                                TCG_TYPE_I32, TCG_TYPE_I32,
6063                                ldst->addr_reg, -1);
6064         tcg_out_helper_load_slots(s, 1, mov, parm);
6065 
6066         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6067                                 TCG_TYPE_I32, 0, parm);
6068         next_arg += 2;
6069     } else {
6070         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6071                                       ldst->addr_reg, -1);
6072         tcg_out_helper_load_slots(s, nmov, mov, parm);
6073         next_arg += nmov;
6074     }
6075 
6076     switch (info->out_kind) {
6077     case TCG_CALL_RET_NORMAL:
6078     case TCG_CALL_RET_BY_VEC:
6079         break;
6080     case TCG_CALL_RET_BY_REF:
6081         /*
6082          * The return reference is in the first argument slot.
6083          * We need memory in which to return: re-use the top of stack.
6084          */
6085         {
6086             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6087 
6088             if (arg_slot_reg_p(0)) {
6089                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6090                                  TCG_REG_CALL_STACK, ofs_slot0);
6091             } else {
6092                 tcg_debug_assert(parm->ntmp != 0);
6093                 tcg_out_addi_ptr(s, parm->tmp[0],
6094                                  TCG_REG_CALL_STACK, ofs_slot0);
6095                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6096                            TCG_REG_CALL_STACK, ofs_slot0);
6097             }
6098         }
6099         break;
6100     default:
6101         g_assert_not_reached();
6102     }
6103 
6104     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6105 }
6106 
6107 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6108                                   bool load_sign,
6109                                   const TCGLdstHelperParam *parm)
6110 {
6111     MemOp mop = get_memop(ldst->oi);
6112     TCGMovExtend mov[2];
6113     int ofs_slot0;
6114 
6115     switch (ldst->type) {
6116     case TCG_TYPE_I64:
6117         if (TCG_TARGET_REG_BITS == 32) {
6118             break;
6119         }
6120         /* fall through */
6121 
6122     case TCG_TYPE_I32:
6123         mov[0].dst = ldst->datalo_reg;
6124         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6125         mov[0].dst_type = ldst->type;
6126         mov[0].src_type = TCG_TYPE_REG;
6127 
6128         /*
6129          * If load_sign, then we allowed the helper to perform the
6130          * appropriate sign extension to tcg_target_ulong, and all
6131          * we need now is a plain move.
6132          *
6133          * If they do not, then we expect the relevant extension
6134          * instruction to be no more expensive than a move, and
6135          * we thus save the icache etc by only using one of two
6136          * helper functions.
6137          */
6138         if (load_sign || !(mop & MO_SIGN)) {
6139             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6140                 mov[0].src_ext = MO_32;
6141             } else {
6142                 mov[0].src_ext = MO_64;
6143             }
6144         } else {
6145             mov[0].src_ext = mop & MO_SSIZE;
6146         }
6147         tcg_out_movext1(s, mov);
6148         return;
6149 
6150     case TCG_TYPE_I128:
6151         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6152         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6153         switch (TCG_TARGET_CALL_RET_I128) {
6154         case TCG_CALL_RET_NORMAL:
6155             break;
6156         case TCG_CALL_RET_BY_VEC:
6157             tcg_out_st(s, TCG_TYPE_V128,
6158                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6159                        TCG_REG_CALL_STACK, ofs_slot0);
6160             /* fall through */
6161         case TCG_CALL_RET_BY_REF:
6162             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6163                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6164             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6165                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6166             return;
6167         default:
6168             g_assert_not_reached();
6169         }
6170         break;
6171 
6172     default:
6173         g_assert_not_reached();
6174     }
6175 
6176     mov[0].dst = ldst->datalo_reg;
6177     mov[0].src =
6178         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6179     mov[0].dst_type = TCG_TYPE_REG;
6180     mov[0].src_type = TCG_TYPE_REG;
6181     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6182 
6183     mov[1].dst = ldst->datahi_reg;
6184     mov[1].src =
6185         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6186     mov[1].dst_type = TCG_TYPE_REG;
6187     mov[1].src_type = TCG_TYPE_REG;
6188     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6189 
6190     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6191 }
6192 
6193 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6194                                    const TCGLdstHelperParam *parm)
6195 {
6196     const TCGHelperInfo *info;
6197     const TCGCallArgumentLoc *loc;
6198     TCGMovExtend mov[4];
6199     TCGType data_type;
6200     unsigned next_arg, nmov, n;
6201     MemOp mop = get_memop(ldst->oi);
6202 
6203     switch (mop & MO_SIZE) {
6204     case MO_8:
6205     case MO_16:
6206     case MO_32:
6207         info = &info_helper_st32_mmu;
6208         data_type = TCG_TYPE_I32;
6209         break;
6210     case MO_64:
6211         info = &info_helper_st64_mmu;
6212         data_type = TCG_TYPE_I64;
6213         break;
6214     case MO_128:
6215         info = &info_helper_st128_mmu;
6216         data_type = TCG_TYPE_I128;
6217         break;
6218     default:
6219         g_assert_not_reached();
6220     }
6221 
6222     /* Defer env argument. */
6223     next_arg = 1;
6224     nmov = 0;
6225 
6226     /* Handle addr argument. */
6227     loc = &info->in[next_arg];
6228     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6229     if (TCG_TARGET_REG_BITS == 32) {
6230         /*
6231          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6232          * to 64-bits for the helper by storing the low part.  Later,
6233          * after we have processed the register inputs, we will load a
6234          * zero for the high part.
6235          */
6236         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6237                                TCG_TYPE_I32, TCG_TYPE_I32,
6238                                ldst->addr_reg, -1);
6239         next_arg += 2;
6240         nmov += 1;
6241     } else {
6242         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6243                                    ldst->addr_reg, -1);
6244         next_arg += n;
6245         nmov += n;
6246     }
6247 
6248     /* Handle data argument. */
6249     loc = &info->in[next_arg];
6250     switch (loc->kind) {
6251     case TCG_CALL_ARG_NORMAL:
6252     case TCG_CALL_ARG_EXTEND_U:
6253     case TCG_CALL_ARG_EXTEND_S:
6254         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6255                                    ldst->datalo_reg, ldst->datahi_reg);
6256         next_arg += n;
6257         nmov += n;
6258         tcg_out_helper_load_slots(s, nmov, mov, parm);
6259         break;
6260 
6261     case TCG_CALL_ARG_BY_REF:
6262         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6263         tcg_debug_assert(data_type == TCG_TYPE_I128);
6264         tcg_out_st(s, TCG_TYPE_I64,
6265                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6266                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6267         tcg_out_st(s, TCG_TYPE_I64,
6268                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6269                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6270 
6271         tcg_out_helper_load_slots(s, nmov, mov, parm);
6272 
6273         if (arg_slot_reg_p(loc->arg_slot)) {
6274             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6275                              TCG_REG_CALL_STACK,
6276                              arg_slot_stk_ofs(loc->ref_slot));
6277         } else {
6278             tcg_debug_assert(parm->ntmp != 0);
6279             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6280                              arg_slot_stk_ofs(loc->ref_slot));
6281             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6282                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6283         }
6284         next_arg += 2;
6285         break;
6286 
6287     default:
6288         g_assert_not_reached();
6289     }
6290 
6291     if (TCG_TARGET_REG_BITS == 32) {
6292         /* Zero extend the address by loading a zero for the high part. */
6293         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6294         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6295     }
6296 
6297     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6298 }
6299 
6300 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6301 {
6302     int i, start_words, num_insns;
6303     TCGOp *op;
6304 
6305     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6306                  && qemu_log_in_addr_range(pc_start))) {
6307         FILE *logfile = qemu_log_trylock();
6308         if (logfile) {
6309             fprintf(logfile, "OP:\n");
6310             tcg_dump_ops(s, logfile, false);
6311             fprintf(logfile, "\n");
6312             qemu_log_unlock(logfile);
6313         }
6314     }
6315 
6316 #ifdef CONFIG_DEBUG_TCG
6317     /* Ensure all labels referenced have been emitted.  */
6318     {
6319         TCGLabel *l;
6320         bool error = false;
6321 
6322         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6323             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6324                 qemu_log_mask(CPU_LOG_TB_OP,
6325                               "$L%d referenced but not present.\n", l->id);
6326                 error = true;
6327             }
6328         }
6329         assert(!error);
6330     }
6331 #endif
6332 
6333     /* Do not reuse any EBB that may be allocated within the TB. */
6334     tcg_temp_ebb_reset_freed(s);
6335 
6336     tcg_optimize(s);
6337 
6338     reachable_code_pass(s);
6339     liveness_pass_0(s);
6340     liveness_pass_1(s);
6341 
6342     if (s->nb_indirects > 0) {
6343         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6344                      && qemu_log_in_addr_range(pc_start))) {
6345             FILE *logfile = qemu_log_trylock();
6346             if (logfile) {
6347                 fprintf(logfile, "OP before indirect lowering:\n");
6348                 tcg_dump_ops(s, logfile, false);
6349                 fprintf(logfile, "\n");
6350                 qemu_log_unlock(logfile);
6351             }
6352         }
6353 
6354         /* Replace indirect temps with direct temps.  */
6355         if (liveness_pass_2(s)) {
6356             /* If changes were made, re-run liveness.  */
6357             liveness_pass_1(s);
6358         }
6359     }
6360 
6361     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6362                  && qemu_log_in_addr_range(pc_start))) {
6363         FILE *logfile = qemu_log_trylock();
6364         if (logfile) {
6365             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6366             tcg_dump_ops(s, logfile, true);
6367             fprintf(logfile, "\n");
6368             qemu_log_unlock(logfile);
6369         }
6370     }
6371 
6372     /* Initialize goto_tb jump offsets. */
6373     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6374     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6375     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6376     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6377 
6378     tcg_reg_alloc_start(s);
6379 
6380     /*
6381      * Reset the buffer pointers when restarting after overflow.
6382      * TODO: Move this into translate-all.c with the rest of the
6383      * buffer management.  Having only this done here is confusing.
6384      */
6385     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6386     s->code_ptr = s->code_buf;
6387     s->data_gen_ptr = NULL;
6388 
6389     QSIMPLEQ_INIT(&s->ldst_labels);
6390     s->pool_labels = NULL;
6391 
6392     start_words = s->insn_start_words;
6393     s->gen_insn_data =
6394         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6395 
6396     tcg_out_tb_start(s);
6397 
6398     num_insns = -1;
6399     QTAILQ_FOREACH(op, &s->ops, link) {
6400         TCGOpcode opc = op->opc;
6401 
6402         switch (opc) {
6403         case INDEX_op_mov_i32:
6404         case INDEX_op_mov_i64:
6405         case INDEX_op_mov_vec:
6406             tcg_reg_alloc_mov(s, op);
6407             break;
6408         case INDEX_op_dup_vec:
6409             tcg_reg_alloc_dup(s, op);
6410             break;
6411         case INDEX_op_insn_start:
6412             if (num_insns >= 0) {
6413                 size_t off = tcg_current_code_size(s);
6414                 s->gen_insn_end_off[num_insns] = off;
6415                 /* Assert that we do not overflow our stored offset.  */
6416                 assert(s->gen_insn_end_off[num_insns] == off);
6417             }
6418             num_insns++;
6419             for (i = 0; i < start_words; ++i) {
6420                 s->gen_insn_data[num_insns * start_words + i] =
6421                     tcg_get_insn_start_param(op, i);
6422             }
6423             break;
6424         case INDEX_op_discard:
6425             temp_dead(s, arg_temp(op->args[0]));
6426             break;
6427         case INDEX_op_set_label:
6428             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6429             tcg_out_label(s, arg_label(op->args[0]));
6430             break;
6431         case INDEX_op_call:
6432             tcg_reg_alloc_call(s, op);
6433             break;
6434         case INDEX_op_exit_tb:
6435             tcg_out_exit_tb(s, op->args[0]);
6436             break;
6437         case INDEX_op_goto_tb:
6438             tcg_out_goto_tb(s, op->args[0]);
6439             break;
6440         case INDEX_op_dup2_vec:
6441             if (tcg_reg_alloc_dup2(s, op)) {
6442                 break;
6443             }
6444             /* fall through */
6445         default:
6446             /* Sanity check that we've not introduced any unhandled opcodes. */
6447             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6448                                               TCGOP_FLAGS(op)));
6449             /* Note: in order to speed up the code, it would be much
6450                faster to have specialized register allocator functions for
6451                some common argument patterns */
6452             tcg_reg_alloc_op(s, op);
6453             break;
6454         }
6455         /* Test for (pending) buffer overflow.  The assumption is that any
6456            one operation beginning below the high water mark cannot overrun
6457            the buffer completely.  Thus we can test for overflow after
6458            generating code without having to check during generation.  */
6459         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6460             return -1;
6461         }
6462         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6463         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6464             return -2;
6465         }
6466     }
6467     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6468     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6469 
6470     /* Generate TB finalization at the end of block */
6471     i = tcg_out_ldst_finalize(s);
6472     if (i < 0) {
6473         return i;
6474     }
6475     i = tcg_out_pool_finalize(s);
6476     if (i < 0) {
6477         return i;
6478     }
6479     if (!tcg_resolve_relocs(s)) {
6480         return -2;
6481     }
6482 
6483 #ifndef CONFIG_TCG_INTERPRETER
6484     /* flush instruction cache */
6485     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6486                         (uintptr_t)s->code_buf,
6487                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6488 #endif
6489 
6490     return tcg_current_code_size(s);
6491 }
6492 
6493 #ifdef ELF_HOST_MACHINE
6494 /* In order to use this feature, the backend needs to do three things:
6495 
6496    (1) Define ELF_HOST_MACHINE to indicate both what value to
6497        put into the ELF image and to indicate support for the feature.
6498 
6499    (2) Define tcg_register_jit.  This should create a buffer containing
6500        the contents of a .debug_frame section that describes the post-
6501        prologue unwind info for the tcg machine.
6502 
6503    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6504 */
6505 
6506 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6507 typedef enum {
6508     JIT_NOACTION = 0,
6509     JIT_REGISTER_FN,
6510     JIT_UNREGISTER_FN
6511 } jit_actions_t;
6512 
6513 struct jit_code_entry {
6514     struct jit_code_entry *next_entry;
6515     struct jit_code_entry *prev_entry;
6516     const void *symfile_addr;
6517     uint64_t symfile_size;
6518 };
6519 
6520 struct jit_descriptor {
6521     uint32_t version;
6522     uint32_t action_flag;
6523     struct jit_code_entry *relevant_entry;
6524     struct jit_code_entry *first_entry;
6525 };
6526 
6527 void __jit_debug_register_code(void) __attribute__((noinline));
6528 void __jit_debug_register_code(void)
6529 {
6530     asm("");
6531 }
6532 
6533 /* Must statically initialize the version, because GDB may check
6534    the version before we can set it.  */
6535 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6536 
6537 /* End GDB interface.  */
6538 
6539 static int find_string(const char *strtab, const char *str)
6540 {
6541     const char *p = strtab + 1;
6542 
6543     while (1) {
6544         if (strcmp(p, str) == 0) {
6545             return p - strtab;
6546         }
6547         p += strlen(p) + 1;
6548     }
6549 }
6550 
6551 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6552                                  const void *debug_frame,
6553                                  size_t debug_frame_size)
6554 {
6555     struct __attribute__((packed)) DebugInfo {
6556         uint32_t  len;
6557         uint16_t  version;
6558         uint32_t  abbrev;
6559         uint8_t   ptr_size;
6560         uint8_t   cu_die;
6561         uint16_t  cu_lang;
6562         uintptr_t cu_low_pc;
6563         uintptr_t cu_high_pc;
6564         uint8_t   fn_die;
6565         char      fn_name[16];
6566         uintptr_t fn_low_pc;
6567         uintptr_t fn_high_pc;
6568         uint8_t   cu_eoc;
6569     };
6570 
6571     struct ElfImage {
6572         ElfW(Ehdr) ehdr;
6573         ElfW(Phdr) phdr;
6574         ElfW(Shdr) shdr[7];
6575         ElfW(Sym)  sym[2];
6576         struct DebugInfo di;
6577         uint8_t    da[24];
6578         char       str[80];
6579     };
6580 
6581     struct ElfImage *img;
6582 
6583     static const struct ElfImage img_template = {
6584         .ehdr = {
6585             .e_ident[EI_MAG0] = ELFMAG0,
6586             .e_ident[EI_MAG1] = ELFMAG1,
6587             .e_ident[EI_MAG2] = ELFMAG2,
6588             .e_ident[EI_MAG3] = ELFMAG3,
6589             .e_ident[EI_CLASS] = ELF_CLASS,
6590             .e_ident[EI_DATA] = ELF_DATA,
6591             .e_ident[EI_VERSION] = EV_CURRENT,
6592             .e_type = ET_EXEC,
6593             .e_machine = ELF_HOST_MACHINE,
6594             .e_version = EV_CURRENT,
6595             .e_phoff = offsetof(struct ElfImage, phdr),
6596             .e_shoff = offsetof(struct ElfImage, shdr),
6597             .e_ehsize = sizeof(ElfW(Shdr)),
6598             .e_phentsize = sizeof(ElfW(Phdr)),
6599             .e_phnum = 1,
6600             .e_shentsize = sizeof(ElfW(Shdr)),
6601             .e_shnum = ARRAY_SIZE(img->shdr),
6602             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6603 #ifdef ELF_HOST_FLAGS
6604             .e_flags = ELF_HOST_FLAGS,
6605 #endif
6606 #ifdef ELF_OSABI
6607             .e_ident[EI_OSABI] = ELF_OSABI,
6608 #endif
6609         },
6610         .phdr = {
6611             .p_type = PT_LOAD,
6612             .p_flags = PF_X,
6613         },
6614         .shdr = {
6615             [0] = { .sh_type = SHT_NULL },
6616             /* Trick: The contents of code_gen_buffer are not present in
6617                this fake ELF file; that got allocated elsewhere.  Therefore
6618                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6619                will not look for contents.  We can record any address.  */
6620             [1] = { /* .text */
6621                 .sh_type = SHT_NOBITS,
6622                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6623             },
6624             [2] = { /* .debug_info */
6625                 .sh_type = SHT_PROGBITS,
6626                 .sh_offset = offsetof(struct ElfImage, di),
6627                 .sh_size = sizeof(struct DebugInfo),
6628             },
6629             [3] = { /* .debug_abbrev */
6630                 .sh_type = SHT_PROGBITS,
6631                 .sh_offset = offsetof(struct ElfImage, da),
6632                 .sh_size = sizeof(img->da),
6633             },
6634             [4] = { /* .debug_frame */
6635                 .sh_type = SHT_PROGBITS,
6636                 .sh_offset = sizeof(struct ElfImage),
6637             },
6638             [5] = { /* .symtab */
6639                 .sh_type = SHT_SYMTAB,
6640                 .sh_offset = offsetof(struct ElfImage, sym),
6641                 .sh_size = sizeof(img->sym),
6642                 .sh_info = 1,
6643                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6644                 .sh_entsize = sizeof(ElfW(Sym)),
6645             },
6646             [6] = { /* .strtab */
6647                 .sh_type = SHT_STRTAB,
6648                 .sh_offset = offsetof(struct ElfImage, str),
6649                 .sh_size = sizeof(img->str),
6650             }
6651         },
6652         .sym = {
6653             [1] = { /* code_gen_buffer */
6654                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6655                 .st_shndx = 1,
6656             }
6657         },
6658         .di = {
6659             .len = sizeof(struct DebugInfo) - 4,
6660             .version = 2,
6661             .ptr_size = sizeof(void *),
6662             .cu_die = 1,
6663             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6664             .fn_die = 2,
6665             .fn_name = "code_gen_buffer"
6666         },
6667         .da = {
6668             1,          /* abbrev number (the cu) */
6669             0x11, 1,    /* DW_TAG_compile_unit, has children */
6670             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6671             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6672             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6673             0, 0,       /* end of abbrev */
6674             2,          /* abbrev number (the fn) */
6675             0x2e, 0,    /* DW_TAG_subprogram, no children */
6676             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6677             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6678             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6679             0, 0,       /* end of abbrev */
6680             0           /* no more abbrev */
6681         },
6682         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6683                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6684     };
6685 
6686     /* We only need a single jit entry; statically allocate it.  */
6687     static struct jit_code_entry one_entry;
6688 
6689     uintptr_t buf = (uintptr_t)buf_ptr;
6690     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6691     DebugFrameHeader *dfh;
6692 
6693     img = g_malloc(img_size);
6694     *img = img_template;
6695 
6696     img->phdr.p_vaddr = buf;
6697     img->phdr.p_paddr = buf;
6698     img->phdr.p_memsz = buf_size;
6699 
6700     img->shdr[1].sh_name = find_string(img->str, ".text");
6701     img->shdr[1].sh_addr = buf;
6702     img->shdr[1].sh_size = buf_size;
6703 
6704     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6705     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6706 
6707     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6708     img->shdr[4].sh_size = debug_frame_size;
6709 
6710     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6711     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6712 
6713     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6714     img->sym[1].st_value = buf;
6715     img->sym[1].st_size = buf_size;
6716 
6717     img->di.cu_low_pc = buf;
6718     img->di.cu_high_pc = buf + buf_size;
6719     img->di.fn_low_pc = buf;
6720     img->di.fn_high_pc = buf + buf_size;
6721 
6722     dfh = (DebugFrameHeader *)(img + 1);
6723     memcpy(dfh, debug_frame, debug_frame_size);
6724     dfh->fde.func_start = buf;
6725     dfh->fde.func_len = buf_size;
6726 
6727 #ifdef DEBUG_JIT
6728     /* Enable this block to be able to debug the ELF image file creation.
6729        One can use readelf, objdump, or other inspection utilities.  */
6730     {
6731         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6732         FILE *f = fopen(jit, "w+b");
6733         if (f) {
6734             if (fwrite(img, img_size, 1, f) != img_size) {
6735                 /* Avoid stupid unused return value warning for fwrite.  */
6736             }
6737             fclose(f);
6738         }
6739     }
6740 #endif
6741 
6742     one_entry.symfile_addr = img;
6743     one_entry.symfile_size = img_size;
6744 
6745     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6746     __jit_debug_descriptor.relevant_entry = &one_entry;
6747     __jit_debug_descriptor.first_entry = &one_entry;
6748     __jit_debug_register_code();
6749 }
6750 #else
6751 /* No support for the feature.  Provide the entry point expected by exec.c,
6752    and implement the internal function we declared earlier.  */
6753 
6754 static void tcg_register_jit_int(const void *buf, size_t size,
6755                                  const void *debug_frame,
6756                                  size_t debug_frame_size)
6757 {
6758 }
6759 
6760 void tcg_register_jit(const void *buf, size_t buf_size)
6761 {
6762 }
6763 #endif /* ELF_HOST_MACHINE */
6764 
6765 #if !TCG_TARGET_MAYBE_vec
6766 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6767 {
6768     g_assert_not_reached();
6769 }
6770 #endif
6771