xref: /qemu/tcg/tcg.c (revision aef5ac8624c7b826ae2adde48bc6997286ee1303)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest);
137 static void tcg_out_mb(TCGContext *s, unsigned bar);
138 static void tcg_out_br(TCGContext *s, TCGLabel *l);
139 static void tcg_out_set_carry(TCGContext *s);
140 static void tcg_out_set_borrow(TCGContext *s);
141 #if TCG_TARGET_MAYBE_vec
142 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
143                             TCGReg dst, TCGReg src);
144 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, TCGReg base, intptr_t offset);
146 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
147                              TCGReg dst, int64_t arg);
148 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
149                            unsigned vecl, unsigned vece,
150                            const TCGArg args[TCG_MAX_OP_ARGS],
151                            const int const_args[TCG_MAX_OP_ARGS]);
152 #else
153 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
154                                    TCGReg dst, TCGReg src)
155 {
156     g_assert_not_reached();
157 }
158 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
159                                     TCGReg dst, TCGReg base, intptr_t offset)
160 {
161     g_assert_not_reached();
162 }
163 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
164                                     TCGReg dst, int64_t arg)
165 {
166     g_assert_not_reached();
167 }
168 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
169                                   unsigned vecl, unsigned vece,
170                                   const TCGArg args[TCG_MAX_OP_ARGS],
171                                   const int const_args[TCG_MAX_OP_ARGS])
172 {
173     g_assert_not_reached();
174 }
175 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
176 {
177     return 0;
178 }
179 #endif
180 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
181                        intptr_t arg2);
182 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
183                         TCGReg base, intptr_t ofs);
184 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
185                          const TCGHelperInfo *info);
186 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
187 static bool tcg_target_const_match(int64_t val, int ct,
188                                    TCGType type, TCGCond cond, int vece);
189 
190 #ifndef CONFIG_USER_ONLY
191 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
192 #endif
193 
194 typedef struct TCGLdstHelperParam {
195     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
196     unsigned ntmp;
197     int tmp[3];
198 } TCGLdstHelperParam;
199 
200 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
201                                    const TCGLdstHelperParam *p)
202     __attribute__((unused));
203 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
204                                   bool load_sign, const TCGLdstHelperParam *p)
205     __attribute__((unused));
206 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
207                                    const TCGLdstHelperParam *p)
208     __attribute__((unused));
209 
210 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
211     [MO_UB] = helper_ldub_mmu,
212     [MO_SB] = helper_ldsb_mmu,
213     [MO_UW] = helper_lduw_mmu,
214     [MO_SW] = helper_ldsw_mmu,
215     [MO_UL] = helper_ldul_mmu,
216     [MO_UQ] = helper_ldq_mmu,
217 #if TCG_TARGET_REG_BITS == 64
218     [MO_SL] = helper_ldsl_mmu,
219     [MO_128] = helper_ld16_mmu,
220 #endif
221 };
222 
223 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
224     [MO_8]  = helper_stb_mmu,
225     [MO_16] = helper_stw_mmu,
226     [MO_32] = helper_stl_mmu,
227     [MO_64] = helper_stq_mmu,
228 #if TCG_TARGET_REG_BITS == 64
229     [MO_128] = helper_st16_mmu,
230 #endif
231 };
232 
233 typedef struct {
234     MemOp atom;   /* lg2 bits of atomicity required */
235     MemOp align;  /* lg2 bits of alignment to use */
236 } TCGAtomAlign;
237 
238 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
239                                            MemOp host_atom, bool allow_two_ops)
240     __attribute__((unused));
241 
242 #ifdef CONFIG_USER_ONLY
243 bool tcg_use_softmmu;
244 #endif
245 
246 TCGContext tcg_init_ctx;
247 __thread TCGContext *tcg_ctx;
248 
249 TCGContext **tcg_ctxs;
250 unsigned int tcg_cur_ctxs;
251 unsigned int tcg_max_ctxs;
252 TCGv_env tcg_env;
253 const void *tcg_code_gen_epilogue;
254 uintptr_t tcg_splitwx_diff;
255 
256 #ifndef CONFIG_TCG_INTERPRETER
257 tcg_prologue_fn *tcg_qemu_tb_exec;
258 #endif
259 
260 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
261 static TCGRegSet tcg_target_call_clobber_regs;
262 
263 #if TCG_TARGET_INSN_UNIT_SIZE == 1
264 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
265 {
266     *s->code_ptr++ = v;
267 }
268 
269 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
270                                                       uint8_t v)
271 {
272     *p = v;
273 }
274 #endif
275 
276 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
277 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
278 {
279     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
280         *s->code_ptr++ = v;
281     } else {
282         tcg_insn_unit *p = s->code_ptr;
283         memcpy(p, &v, sizeof(v));
284         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
285     }
286 }
287 
288 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
289                                                        uint16_t v)
290 {
291     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
292         *p = v;
293     } else {
294         memcpy(p, &v, sizeof(v));
295     }
296 }
297 #endif
298 
299 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
300 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
301 {
302     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
303         *s->code_ptr++ = v;
304     } else {
305         tcg_insn_unit *p = s->code_ptr;
306         memcpy(p, &v, sizeof(v));
307         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
308     }
309 }
310 
311 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
312                                                        uint32_t v)
313 {
314     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
315         *p = v;
316     } else {
317         memcpy(p, &v, sizeof(v));
318     }
319 }
320 #endif
321 
322 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
323 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
324 {
325     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
326         *s->code_ptr++ = v;
327     } else {
328         tcg_insn_unit *p = s->code_ptr;
329         memcpy(p, &v, sizeof(v));
330         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
331     }
332 }
333 
334 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
335                                                        uint64_t v)
336 {
337     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
338         *p = v;
339     } else {
340         memcpy(p, &v, sizeof(v));
341     }
342 }
343 #endif
344 
345 /* label relocation processing */
346 
347 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
348                           TCGLabel *l, intptr_t addend)
349 {
350     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
351 
352     r->type = type;
353     r->ptr = code_ptr;
354     r->addend = addend;
355     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
356 }
357 
358 static void tcg_out_label(TCGContext *s, TCGLabel *l)
359 {
360     tcg_debug_assert(!l->has_value);
361     l->has_value = 1;
362     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
363 }
364 
365 TCGLabel *gen_new_label(void)
366 {
367     TCGContext *s = tcg_ctx;
368     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
369 
370     memset(l, 0, sizeof(TCGLabel));
371     l->id = s->nb_labels++;
372     QSIMPLEQ_INIT(&l->branches);
373     QSIMPLEQ_INIT(&l->relocs);
374 
375     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
376 
377     return l;
378 }
379 
380 static bool tcg_resolve_relocs(TCGContext *s)
381 {
382     TCGLabel *l;
383 
384     QSIMPLEQ_FOREACH(l, &s->labels, next) {
385         TCGRelocation *r;
386         uintptr_t value = l->u.value;
387 
388         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
389             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
390                 return false;
391             }
392         }
393     }
394     return true;
395 }
396 
397 static void set_jmp_reset_offset(TCGContext *s, int which)
398 {
399     /*
400      * We will check for overflow at the end of the opcode loop in
401      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
402      */
403     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
404 }
405 
406 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
407 {
408     /*
409      * We will check for overflow at the end of the opcode loop in
410      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
411      */
412     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
413 }
414 
415 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
416 {
417     /*
418      * Return the read-execute version of the pointer, for the benefit
419      * of any pc-relative addressing mode.
420      */
421     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
422 }
423 
424 static int __attribute__((unused))
425 tlb_mask_table_ofs(TCGContext *s, int which)
426 {
427     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
428             sizeof(CPUNegativeOffsetState));
429 }
430 
431 /* Signal overflow, starting over with fewer guest insns. */
432 static G_NORETURN
433 void tcg_raise_tb_overflow(TCGContext *s)
434 {
435     siglongjmp(s->jmp_trans, -2);
436 }
437 
438 /*
439  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
440  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
441  *
442  * However, tcg_out_helper_load_slots reuses this field to hold an
443  * argument slot number (which may designate a argument register or an
444  * argument stack slot), converting to TCGReg once all arguments that
445  * are destined for the stack are processed.
446  */
447 typedef struct TCGMovExtend {
448     unsigned dst;
449     TCGReg src;
450     TCGType dst_type;
451     TCGType src_type;
452     MemOp src_ext;
453 } TCGMovExtend;
454 
455 /**
456  * tcg_out_movext -- move and extend
457  * @s: tcg context
458  * @dst_type: integral type for destination
459  * @dst: destination register
460  * @src_type: integral type for source
461  * @src_ext: extension to apply to source
462  * @src: source register
463  *
464  * Move or extend @src into @dst, depending on @src_ext and the types.
465  */
466 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
467                            TCGType src_type, MemOp src_ext, TCGReg src)
468 {
469     switch (src_ext) {
470     case MO_UB:
471         tcg_out_ext8u(s, dst, src);
472         break;
473     case MO_SB:
474         tcg_out_ext8s(s, dst_type, dst, src);
475         break;
476     case MO_UW:
477         tcg_out_ext16u(s, dst, src);
478         break;
479     case MO_SW:
480         tcg_out_ext16s(s, dst_type, dst, src);
481         break;
482     case MO_UL:
483     case MO_SL:
484         if (dst_type == TCG_TYPE_I32) {
485             if (src_type == TCG_TYPE_I32) {
486                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
487             } else {
488                 tcg_out_extrl_i64_i32(s, dst, src);
489             }
490         } else if (src_type == TCG_TYPE_I32) {
491             if (src_ext & MO_SIGN) {
492                 tcg_out_exts_i32_i64(s, dst, src);
493             } else {
494                 tcg_out_extu_i32_i64(s, dst, src);
495             }
496         } else {
497             if (src_ext & MO_SIGN) {
498                 tcg_out_ext32s(s, dst, src);
499             } else {
500                 tcg_out_ext32u(s, dst, src);
501             }
502         }
503         break;
504     case MO_UQ:
505         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
506         if (dst_type == TCG_TYPE_I32) {
507             tcg_out_extrl_i64_i32(s, dst, src);
508         } else {
509             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
510         }
511         break;
512     default:
513         g_assert_not_reached();
514     }
515 }
516 
517 /* Minor variations on a theme, using a structure. */
518 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
519                                     TCGReg src)
520 {
521     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
522 }
523 
524 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
525 {
526     tcg_out_movext1_new_src(s, i, i->src);
527 }
528 
529 /**
530  * tcg_out_movext2 -- move and extend two pair
531  * @s: tcg context
532  * @i1: first move description
533  * @i2: second move description
534  * @scratch: temporary register, or -1 for none
535  *
536  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
537  * between the sources and destinations.
538  */
539 
540 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
541                             const TCGMovExtend *i2, int scratch)
542 {
543     TCGReg src1 = i1->src;
544     TCGReg src2 = i2->src;
545 
546     if (i1->dst != src2) {
547         tcg_out_movext1(s, i1);
548         tcg_out_movext1(s, i2);
549         return;
550     }
551     if (i2->dst == src1) {
552         TCGType src1_type = i1->src_type;
553         TCGType src2_type = i2->src_type;
554 
555         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
556             /* The data is now in the correct registers, now extend. */
557             src1 = i2->src;
558             src2 = i1->src;
559         } else {
560             tcg_debug_assert(scratch >= 0);
561             tcg_out_mov(s, src1_type, scratch, src1);
562             src1 = scratch;
563         }
564     }
565     tcg_out_movext1_new_src(s, i2, src2);
566     tcg_out_movext1_new_src(s, i1, src1);
567 }
568 
569 /**
570  * tcg_out_movext3 -- move and extend three pair
571  * @s: tcg context
572  * @i1: first move description
573  * @i2: second move description
574  * @i3: third move description
575  * @scratch: temporary register, or -1 for none
576  *
577  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
578  * between the sources and destinations.
579  */
580 
581 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
582                             const TCGMovExtend *i2, const TCGMovExtend *i3,
583                             int scratch)
584 {
585     TCGReg src1 = i1->src;
586     TCGReg src2 = i2->src;
587     TCGReg src3 = i3->src;
588 
589     if (i1->dst != src2 && i1->dst != src3) {
590         tcg_out_movext1(s, i1);
591         tcg_out_movext2(s, i2, i3, scratch);
592         return;
593     }
594     if (i2->dst != src1 && i2->dst != src3) {
595         tcg_out_movext1(s, i2);
596         tcg_out_movext2(s, i1, i3, scratch);
597         return;
598     }
599     if (i3->dst != src1 && i3->dst != src2) {
600         tcg_out_movext1(s, i3);
601         tcg_out_movext2(s, i1, i2, scratch);
602         return;
603     }
604 
605     /*
606      * There is a cycle.  Since there are only 3 nodes, the cycle is
607      * either "clockwise" or "anti-clockwise", and can be solved with
608      * a single scratch or two xchg.
609      */
610     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
611         /* "Clockwise" */
612         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
613             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
614             /* The data is now in the correct registers, now extend. */
615             tcg_out_movext1_new_src(s, i1, i1->dst);
616             tcg_out_movext1_new_src(s, i2, i2->dst);
617             tcg_out_movext1_new_src(s, i3, i3->dst);
618         } else {
619             tcg_debug_assert(scratch >= 0);
620             tcg_out_mov(s, i1->src_type, scratch, src1);
621             tcg_out_movext1(s, i3);
622             tcg_out_movext1(s, i2);
623             tcg_out_movext1_new_src(s, i1, scratch);
624         }
625     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
626         /* "Anti-clockwise" */
627         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
628             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
629             /* The data is now in the correct registers, now extend. */
630             tcg_out_movext1_new_src(s, i1, i1->dst);
631             tcg_out_movext1_new_src(s, i2, i2->dst);
632             tcg_out_movext1_new_src(s, i3, i3->dst);
633         } else {
634             tcg_debug_assert(scratch >= 0);
635             tcg_out_mov(s, i1->src_type, scratch, src1);
636             tcg_out_movext1(s, i2);
637             tcg_out_movext1(s, i3);
638             tcg_out_movext1_new_src(s, i1, scratch);
639         }
640     } else {
641         g_assert_not_reached();
642     }
643 }
644 
645 /*
646  * Allocate a new TCGLabelQemuLdst entry.
647  */
648 
649 __attribute__((unused))
650 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
651 {
652     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
653 
654     memset(l, 0, sizeof(*l));
655     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
656 
657     return l;
658 }
659 
660 /*
661  * Allocate new constant pool entries.
662  */
663 
664 typedef struct TCGLabelPoolData {
665     struct TCGLabelPoolData *next;
666     tcg_insn_unit *label;
667     intptr_t addend;
668     int rtype;
669     unsigned nlong;
670     tcg_target_ulong data[];
671 } TCGLabelPoolData;
672 
673 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
674                                         tcg_insn_unit *label, intptr_t addend)
675 {
676     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
677                                      + sizeof(tcg_target_ulong) * nlong);
678 
679     n->label = label;
680     n->addend = addend;
681     n->rtype = rtype;
682     n->nlong = nlong;
683     return n;
684 }
685 
686 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
687 {
688     TCGLabelPoolData *i, **pp;
689     int nlong = n->nlong;
690 
691     /* Insertion sort on the pool.  */
692     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
693         if (nlong > i->nlong) {
694             break;
695         }
696         if (nlong < i->nlong) {
697             continue;
698         }
699         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
700             break;
701         }
702     }
703     n->next = *pp;
704     *pp = n;
705 }
706 
707 /* The "usual" for generic integer code.  */
708 __attribute__((unused))
709 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
710                            tcg_insn_unit *label, intptr_t addend)
711 {
712     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
713     n->data[0] = d;
714     new_pool_insert(s, n);
715 }
716 
717 /* For v64 or v128, depending on the host.  */
718 __attribute__((unused))
719 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
720                         intptr_t addend, tcg_target_ulong d0,
721                         tcg_target_ulong d1)
722 {
723     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
724     n->data[0] = d0;
725     n->data[1] = d1;
726     new_pool_insert(s, n);
727 }
728 
729 /* For v128 or v256, depending on the host.  */
730 __attribute__((unused))
731 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
732                         intptr_t addend, tcg_target_ulong d0,
733                         tcg_target_ulong d1, tcg_target_ulong d2,
734                         tcg_target_ulong d3)
735 {
736     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
737     n->data[0] = d0;
738     n->data[1] = d1;
739     n->data[2] = d2;
740     n->data[3] = d3;
741     new_pool_insert(s, n);
742 }
743 
744 /* For v256, for 32-bit host.  */
745 __attribute__((unused))
746 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
747                         intptr_t addend, tcg_target_ulong d0,
748                         tcg_target_ulong d1, tcg_target_ulong d2,
749                         tcg_target_ulong d3, tcg_target_ulong d4,
750                         tcg_target_ulong d5, tcg_target_ulong d6,
751                         tcg_target_ulong d7)
752 {
753     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
754     n->data[0] = d0;
755     n->data[1] = d1;
756     n->data[2] = d2;
757     n->data[3] = d3;
758     n->data[4] = d4;
759     n->data[5] = d5;
760     n->data[6] = d6;
761     n->data[7] = d7;
762     new_pool_insert(s, n);
763 }
764 
765 /*
766  * Generate TB finalization at the end of block
767  */
768 
769 static int tcg_out_ldst_finalize(TCGContext *s)
770 {
771     TCGLabelQemuLdst *lb;
772 
773     /* qemu_ld/st slow paths */
774     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
775         if (lb->is_ld
776             ? !tcg_out_qemu_ld_slow_path(s, lb)
777             : !tcg_out_qemu_st_slow_path(s, lb)) {
778             return -2;
779         }
780 
781         /*
782          * Test for (pending) buffer overflow.  The assumption is that any
783          * one operation beginning below the high water mark cannot overrun
784          * the buffer completely.  Thus we can test for overflow after
785          * generating code without having to check during generation.
786          */
787         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
788             return -1;
789         }
790     }
791     return 0;
792 }
793 
794 static int tcg_out_pool_finalize(TCGContext *s)
795 {
796     TCGLabelPoolData *p = s->pool_labels;
797     TCGLabelPoolData *l = NULL;
798     void *a;
799 
800     if (p == NULL) {
801         return 0;
802     }
803 
804     /*
805      * ??? Round up to qemu_icache_linesize, but then do not round
806      * again when allocating the next TranslationBlock structure.
807      */
808     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
809                          sizeof(tcg_target_ulong) * p->nlong);
810     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
811     s->data_gen_ptr = a;
812 
813     for (; p != NULL; p = p->next) {
814         size_t size = sizeof(tcg_target_ulong) * p->nlong;
815         uintptr_t value;
816 
817         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
818             if (unlikely(a > s->code_gen_highwater)) {
819                 return -1;
820             }
821             memcpy(a, p->data, size);
822             a += size;
823             l = p;
824         }
825 
826         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
827         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
828             return -2;
829         }
830     }
831 
832     s->code_ptr = a;
833     return 0;
834 }
835 
836 #define C_PFX1(P, A)                    P##A
837 #define C_PFX2(P, A, B)                 P##A##_##B
838 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
839 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
840 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
841 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
842 
843 /* Define an enumeration for the various combinations. */
844 
845 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
846 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
847 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
848 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
849 
850 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
851 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
852 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
853 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
854 
855 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
856 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
857 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
858 
859 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
860 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
861 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
862 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
863 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
864 
865 typedef enum {
866     C_Dynamic = -2,
867     C_NotImplemented = -1,
868 #include "tcg-target-con-set.h"
869 } TCGConstraintSetIndex;
870 
871 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
872 
873 #undef C_O0_I1
874 #undef C_O0_I2
875 #undef C_O0_I3
876 #undef C_O0_I4
877 #undef C_O1_I1
878 #undef C_O1_I2
879 #undef C_O1_I3
880 #undef C_O1_I4
881 #undef C_N1_I2
882 #undef C_N1O1_I1
883 #undef C_N2_I1
884 #undef C_O2_I1
885 #undef C_O2_I2
886 #undef C_O2_I3
887 #undef C_O2_I4
888 #undef C_N1_O1_I4
889 
890 /* Put all of the constraint sets into an array, indexed by the enum. */
891 
892 typedef struct TCGConstraintSet {
893     uint8_t nb_oargs, nb_iargs;
894     const char *args_ct_str[TCG_MAX_OP_ARGS];
895 } TCGConstraintSet;
896 
897 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
898 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
899 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
900 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
901 
902 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
903 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
904 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
905 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
906 
907 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
908 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
909 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
910 
911 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
912 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
913 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
914 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
915 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
916 
917 static const TCGConstraintSet constraint_sets[] = {
918 #include "tcg-target-con-set.h"
919 };
920 
921 #undef C_O0_I1
922 #undef C_O0_I2
923 #undef C_O0_I3
924 #undef C_O0_I4
925 #undef C_O1_I1
926 #undef C_O1_I2
927 #undef C_O1_I3
928 #undef C_O1_I4
929 #undef C_N1_I2
930 #undef C_N1O1_I1
931 #undef C_N2_I1
932 #undef C_O2_I1
933 #undef C_O2_I2
934 #undef C_O2_I3
935 #undef C_O2_I4
936 #undef C_N1_O1_I4
937 
938 /* Expand the enumerator to be returned from tcg_target_op_def(). */
939 
940 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
941 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
942 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
943 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
944 
945 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
946 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
947 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
948 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
949 
950 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
951 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
952 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
953 
954 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
955 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
956 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
957 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
958 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
959 
960 /*
961  * TCGOutOp is the base class for a set of structures that describe how
962  * to generate code for a given TCGOpcode.
963  *
964  * @static_constraint:
965  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
966  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
967  *                     based on any of @type, @flags, or host isa.
968  *   Otherwise:        The register allocation constrains for the TCGOpcode.
969  *
970  * Subclasses of TCGOutOp will define a set of output routines that may
971  * be used.  Such routines will often be selected by the set of registers
972  * and constants that come out of register allocation.  The set of
973  * routines that are provided will guide the set of constraints that are
974  * legal.  In particular, assume that tcg_optimize() has done its job in
975  * swapping commutative operands and folding operations for which all
976  * operands are constant.
977  */
978 typedef struct TCGOutOp {
979     TCGConstraintSetIndex static_constraint;
980     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
981 } TCGOutOp;
982 
983 typedef struct TCGOutOpAddSubCarry {
984     TCGOutOp base;
985     void (*out_rrr)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, TCGReg a2);
987     void (*out_rri)(TCGContext *s, TCGType type,
988                     TCGReg a0, TCGReg a1, tcg_target_long a2);
989     void (*out_rir)(TCGContext *s, TCGType type,
990                     TCGReg a0, tcg_target_long a1, TCGReg a2);
991     void (*out_rii)(TCGContext *s, TCGType type,
992                     TCGReg a0, tcg_target_long a1, tcg_target_long a2);
993 } TCGOutOpAddSubCarry;
994 
995 typedef struct TCGOutOpBinary {
996     TCGOutOp base;
997     void (*out_rrr)(TCGContext *s, TCGType type,
998                     TCGReg a0, TCGReg a1, TCGReg a2);
999     void (*out_rri)(TCGContext *s, TCGType type,
1000                     TCGReg a0, TCGReg a1, tcg_target_long a2);
1001 } TCGOutOpBinary;
1002 
1003 typedef struct TCGOutOpBrcond {
1004     TCGOutOp base;
1005     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
1006                    TCGReg a1, TCGReg a2, TCGLabel *label);
1007     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
1008                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
1009 } TCGOutOpBrcond;
1010 
1011 typedef struct TCGOutOpBrcond2 {
1012     TCGOutOp base;
1013     void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
1014                 TCGArg bl, bool const_bl,
1015                 TCGArg bh, bool const_bh, TCGLabel *l);
1016 } TCGOutOpBrcond2;
1017 
1018 typedef struct TCGOutOpBswap {
1019     TCGOutOp base;
1020     void (*out_rr)(TCGContext *s, TCGType type,
1021                    TCGReg a0, TCGReg a1, unsigned flags);
1022 } TCGOutOpBswap;
1023 
1024 typedef struct TCGOutOpDeposit {
1025     TCGOutOp base;
1026     void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1027                     TCGReg a2, unsigned ofs, unsigned len);
1028     void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1029                     tcg_target_long a2, unsigned ofs, unsigned len);
1030     void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0,
1031                     TCGReg a2, unsigned ofs, unsigned len);
1032 } TCGOutOpDeposit;
1033 
1034 typedef struct TCGOutOpDivRem {
1035     TCGOutOp base;
1036     void (*out_rr01r)(TCGContext *s, TCGType type,
1037                       TCGReg a0, TCGReg a1, TCGReg a4);
1038 } TCGOutOpDivRem;
1039 
1040 typedef struct TCGOutOpExtract {
1041     TCGOutOp base;
1042     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1043                    unsigned ofs, unsigned len);
1044 } TCGOutOpExtract;
1045 
1046 typedef struct TCGOutOpExtract2 {
1047     TCGOutOp base;
1048     void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1049                     TCGReg a2, unsigned shr);
1050 } TCGOutOpExtract2;
1051 
1052 typedef struct TCGOutOpLoad {
1053     TCGOutOp base;
1054     void (*out)(TCGContext *s, TCGType type, TCGReg dest,
1055                 TCGReg base, intptr_t offset);
1056 } TCGOutOpLoad;
1057 
1058 typedef struct TCGOutOpMovcond {
1059     TCGOutOp base;
1060     void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1061                 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1062                 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1063 } TCGOutOpMovcond;
1064 
1065 typedef struct TCGOutOpMul2 {
1066     TCGOutOp base;
1067     void (*out_rrrr)(TCGContext *s, TCGType type,
1068                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1069 } TCGOutOpMul2;
1070 
1071 typedef struct TCGOutOpQemuLdSt {
1072     TCGOutOp base;
1073     void (*out)(TCGContext *s, TCGType type, TCGReg dest,
1074                 TCGReg addr, MemOpIdx oi);
1075 } TCGOutOpQemuLdSt;
1076 
1077 typedef struct TCGOutOpQemuLdSt2 {
1078     TCGOutOp base;
1079     void (*out)(TCGContext *s, TCGType type, TCGReg dlo, TCGReg dhi,
1080                 TCGReg addr, MemOpIdx oi);
1081 } TCGOutOpQemuLdSt2;
1082 
1083 typedef struct TCGOutOpUnary {
1084     TCGOutOp base;
1085     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1086 } TCGOutOpUnary;
1087 
1088 typedef struct TCGOutOpSetcond {
1089     TCGOutOp base;
1090     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1091                     TCGReg ret, TCGReg a1, TCGReg a2);
1092     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1093                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1094 } TCGOutOpSetcond;
1095 
1096 typedef struct TCGOutOpSetcond2 {
1097     TCGOutOp base;
1098     void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah,
1099                 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh);
1100 } TCGOutOpSetcond2;
1101 
1102 typedef struct TCGOutOpStore {
1103     TCGOutOp base;
1104     void (*out_r)(TCGContext *s, TCGType type, TCGReg data,
1105                   TCGReg base, intptr_t offset);
1106     void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data,
1107                   TCGReg base, intptr_t offset);
1108 } TCGOutOpStore;
1109 
1110 typedef struct TCGOutOpSubtract {
1111     TCGOutOp base;
1112     void (*out_rrr)(TCGContext *s, TCGType type,
1113                     TCGReg a0, TCGReg a1, TCGReg a2);
1114     void (*out_rir)(TCGContext *s, TCGType type,
1115                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1116 } TCGOutOpSubtract;
1117 
1118 #include "tcg-target.c.inc"
1119 
1120 #ifndef CONFIG_TCG_INTERPRETER
1121 /* Validate CPUTLBDescFast placement. */
1122 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1123                         sizeof(CPUNegativeOffsetState))
1124                   < MIN_TLB_MASK_TABLE_OFS);
1125 #endif
1126 
1127 #if TCG_TARGET_REG_BITS == 64
1128 /*
1129  * We require these functions for slow-path function calls.
1130  * Adapt them generically for opcode output.
1131  */
1132 
1133 static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1134 {
1135     tcg_out_exts_i32_i64(s, a0, a1);
1136 }
1137 
1138 static const TCGOutOpUnary outop_exts_i32_i64 = {
1139     .base.static_constraint = C_O1_I1(r, r),
1140     .out_rr = tgen_exts_i32_i64,
1141 };
1142 
1143 static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1144 {
1145     tcg_out_extu_i32_i64(s, a0, a1);
1146 }
1147 
1148 static const TCGOutOpUnary outop_extu_i32_i64 = {
1149     .base.static_constraint = C_O1_I1(r, r),
1150     .out_rr = tgen_extu_i32_i64,
1151 };
1152 
1153 static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1154 {
1155     tcg_out_extrl_i64_i32(s, a0, a1);
1156 }
1157 
1158 static const TCGOutOpUnary outop_extrl_i64_i32 = {
1159     .base.static_constraint = C_O1_I1(r, r),
1160     .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
1161 };
1162 #endif
1163 
1164 static const TCGOutOp outop_goto_ptr = {
1165     .static_constraint = C_O0_I1(r),
1166 };
1167 
1168 static const TCGOutOpLoad outop_ld = {
1169     .base.static_constraint = C_O1_I1(r, r),
1170     .out = tcg_out_ld,
1171 };
1172 
1173 /*
1174  * Register V as the TCGOutOp for O.
1175  * This verifies that V is of type T, otherwise give a nice compiler error.
1176  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1177  */
1178 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1179 
1180 /* Register allocation descriptions for every TCGOpcode. */
1181 static const TCGOutOp * const all_outop[NB_OPS] = {
1182     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1183     OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci),
1184     OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio),
1185     OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco),
1186     /* addc1o is implemented with set_carry + addcio */
1187     OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio),
1188     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1189     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1190     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1191     OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16),
1192     OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32),
1193     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1194     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1195     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1196     OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit),
1197     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1198     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1199     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1200     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1201     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1202     OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract),
1203     OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2),
1204     OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u),
1205     OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s),
1206     OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u),
1207     OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s),
1208     OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld),
1209     OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1210     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1211     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1212     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1213     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1214     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1215     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1216     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1217     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1218     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1219     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1220     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1221     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1222     OUTOP(INDEX_op_qemu_ld, TCGOutOpQemuLdSt, outop_qemu_ld),
1223     OUTOP(INDEX_op_qemu_ld2, TCGOutOpQemuLdSt2, outop_qemu_ld2),
1224     OUTOP(INDEX_op_qemu_st, TCGOutOpQemuLdSt, outop_qemu_st),
1225     OUTOP(INDEX_op_qemu_st2, TCGOutOpQemuLdSt2, outop_qemu_st2),
1226     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1227     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1228     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1229     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1230     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1231     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1232     OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract),
1233     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1234     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1235     OUTOP(INDEX_op_st, TCGOutOpStore, outop_st),
1236     OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8),
1237     OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16),
1238     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1239     OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi),
1240     OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio),
1241     OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo),
1242     /* subb1o is implemented with set_borrow + subbio */
1243     OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio),
1244     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1245 
1246     [INDEX_op_goto_ptr] = &outop_goto_ptr,
1247 
1248 #if TCG_TARGET_REG_BITS == 32
1249     OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2),
1250     OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2),
1251 #else
1252     OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
1253     OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
1254     OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
1255     OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
1256     OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32),
1257     OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u),
1258     OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s),
1259     OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st),
1260 #endif
1261 };
1262 
1263 #undef OUTOP
1264 
1265 /*
1266  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1267  * and registered the target's TCG globals) must register with this function
1268  * before initiating translation.
1269  *
1270  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1271  * of tcg_region_init() for the reasoning behind this.
1272  *
1273  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1274  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1275  * is not used anymore for translation once this function is called.
1276  *
1277  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1278  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1279  * modes.
1280  */
1281 #ifdef CONFIG_USER_ONLY
1282 void tcg_register_thread(void)
1283 {
1284     tcg_ctx = &tcg_init_ctx;
1285 }
1286 #else
1287 void tcg_register_thread(void)
1288 {
1289     TCGContext *s = g_malloc(sizeof(*s));
1290     unsigned int i, n;
1291 
1292     *s = tcg_init_ctx;
1293 
1294     /* Relink mem_base.  */
1295     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1296         if (tcg_init_ctx.temps[i].mem_base) {
1297             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1298             tcg_debug_assert(b >= 0 && b < n);
1299             s->temps[i].mem_base = &s->temps[b];
1300         }
1301     }
1302 
1303     /* Claim an entry in tcg_ctxs */
1304     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1305     g_assert(n < tcg_max_ctxs);
1306     qatomic_set(&tcg_ctxs[n], s);
1307 
1308     if (n > 0) {
1309         tcg_region_initial_alloc(s);
1310     }
1311 
1312     tcg_ctx = s;
1313 }
1314 #endif /* !CONFIG_USER_ONLY */
1315 
1316 /* pool based memory allocation */
1317 void *tcg_malloc_internal(TCGContext *s, int size)
1318 {
1319     TCGPool *p;
1320     int pool_size;
1321 
1322     if (size > TCG_POOL_CHUNK_SIZE) {
1323         /* big malloc: insert a new pool (XXX: could optimize) */
1324         p = g_malloc(sizeof(TCGPool) + size);
1325         p->size = size;
1326         p->next = s->pool_first_large;
1327         s->pool_first_large = p;
1328         return p->data;
1329     } else {
1330         p = s->pool_current;
1331         if (!p) {
1332             p = s->pool_first;
1333             if (!p)
1334                 goto new_pool;
1335         } else {
1336             if (!p->next) {
1337             new_pool:
1338                 pool_size = TCG_POOL_CHUNK_SIZE;
1339                 p = g_malloc(sizeof(TCGPool) + pool_size);
1340                 p->size = pool_size;
1341                 p->next = NULL;
1342                 if (s->pool_current) {
1343                     s->pool_current->next = p;
1344                 } else {
1345                     s->pool_first = p;
1346                 }
1347             } else {
1348                 p = p->next;
1349             }
1350         }
1351     }
1352     s->pool_current = p;
1353     s->pool_cur = p->data + size;
1354     s->pool_end = p->data + p->size;
1355     return p->data;
1356 }
1357 
1358 void tcg_pool_reset(TCGContext *s)
1359 {
1360     TCGPool *p, *t;
1361     for (p = s->pool_first_large; p; p = t) {
1362         t = p->next;
1363         g_free(p);
1364     }
1365     s->pool_first_large = NULL;
1366     s->pool_cur = s->pool_end = NULL;
1367     s->pool_current = NULL;
1368 }
1369 
1370 /*
1371  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1372  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1373  * We only use these for layout in tcg_out_ld_helper_ret and
1374  * tcg_out_st_helper_args, and share them between several of
1375  * the helpers, with the end result that it's easier to build manually.
1376  */
1377 
1378 #if TCG_TARGET_REG_BITS == 32
1379 # define dh_typecode_ttl  dh_typecode_i32
1380 #else
1381 # define dh_typecode_ttl  dh_typecode_i64
1382 #endif
1383 
1384 static TCGHelperInfo info_helper_ld32_mmu = {
1385     .flags = TCG_CALL_NO_WG,
1386     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1387               | dh_typemask(env, 1)
1388               | dh_typemask(i64, 2)  /* uint64_t addr */
1389               | dh_typemask(i32, 3)  /* unsigned oi */
1390               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1391 };
1392 
1393 static TCGHelperInfo info_helper_ld64_mmu = {
1394     .flags = TCG_CALL_NO_WG,
1395     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1396               | dh_typemask(env, 1)
1397               | dh_typemask(i64, 2)  /* uint64_t addr */
1398               | dh_typemask(i32, 3)  /* unsigned oi */
1399               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1400 };
1401 
1402 static TCGHelperInfo info_helper_ld128_mmu = {
1403     .flags = TCG_CALL_NO_WG,
1404     .typemask = dh_typemask(i128, 0) /* return Int128 */
1405               | dh_typemask(env, 1)
1406               | dh_typemask(i64, 2)  /* uint64_t addr */
1407               | dh_typemask(i32, 3)  /* unsigned oi */
1408               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1409 };
1410 
1411 static TCGHelperInfo info_helper_st32_mmu = {
1412     .flags = TCG_CALL_NO_WG,
1413     .typemask = dh_typemask(void, 0)
1414               | dh_typemask(env, 1)
1415               | dh_typemask(i64, 2)  /* uint64_t addr */
1416               | dh_typemask(i32, 3)  /* uint32_t data */
1417               | dh_typemask(i32, 4)  /* unsigned oi */
1418               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1419 };
1420 
1421 static TCGHelperInfo info_helper_st64_mmu = {
1422     .flags = TCG_CALL_NO_WG,
1423     .typemask = dh_typemask(void, 0)
1424               | dh_typemask(env, 1)
1425               | dh_typemask(i64, 2)  /* uint64_t addr */
1426               | dh_typemask(i64, 3)  /* uint64_t data */
1427               | dh_typemask(i32, 4)  /* unsigned oi */
1428               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1429 };
1430 
1431 static TCGHelperInfo info_helper_st128_mmu = {
1432     .flags = TCG_CALL_NO_WG,
1433     .typemask = dh_typemask(void, 0)
1434               | dh_typemask(env, 1)
1435               | dh_typemask(i64, 2)  /* uint64_t addr */
1436               | dh_typemask(i128, 3) /* Int128 data */
1437               | dh_typemask(i32, 4)  /* unsigned oi */
1438               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1439 };
1440 
1441 #ifdef CONFIG_TCG_INTERPRETER
1442 static ffi_type *typecode_to_ffi(int argmask)
1443 {
1444     /*
1445      * libffi does not support __int128_t, so we have forced Int128
1446      * to use the structure definition instead of the builtin type.
1447      */
1448     static ffi_type *ffi_type_i128_elements[3] = {
1449         &ffi_type_uint64,
1450         &ffi_type_uint64,
1451         NULL
1452     };
1453     static ffi_type ffi_type_i128 = {
1454         .size = 16,
1455         .alignment = __alignof__(Int128),
1456         .type = FFI_TYPE_STRUCT,
1457         .elements = ffi_type_i128_elements,
1458     };
1459 
1460     switch (argmask) {
1461     case dh_typecode_void:
1462         return &ffi_type_void;
1463     case dh_typecode_i32:
1464         return &ffi_type_uint32;
1465     case dh_typecode_s32:
1466         return &ffi_type_sint32;
1467     case dh_typecode_i64:
1468         return &ffi_type_uint64;
1469     case dh_typecode_s64:
1470         return &ffi_type_sint64;
1471     case dh_typecode_ptr:
1472         return &ffi_type_pointer;
1473     case dh_typecode_i128:
1474         return &ffi_type_i128;
1475     }
1476     g_assert_not_reached();
1477 }
1478 
1479 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1480 {
1481     unsigned typemask = info->typemask;
1482     struct {
1483         ffi_cif cif;
1484         ffi_type *args[];
1485     } *ca;
1486     ffi_status status;
1487     int nargs;
1488 
1489     /* Ignoring the return type, find the last non-zero field. */
1490     nargs = 32 - clz32(typemask >> 3);
1491     nargs = DIV_ROUND_UP(nargs, 3);
1492     assert(nargs <= MAX_CALL_IARGS);
1493 
1494     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1495     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1496     ca->cif.nargs = nargs;
1497 
1498     if (nargs != 0) {
1499         ca->cif.arg_types = ca->args;
1500         for (int j = 0; j < nargs; ++j) {
1501             int typecode = extract32(typemask, (j + 1) * 3, 3);
1502             ca->args[j] = typecode_to_ffi(typecode);
1503         }
1504     }
1505 
1506     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1507                           ca->cif.rtype, ca->cif.arg_types);
1508     assert(status == FFI_OK);
1509 
1510     return &ca->cif;
1511 }
1512 
1513 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1514 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1515 #else
1516 #define HELPER_INFO_INIT(I)      (&(I)->init)
1517 #define HELPER_INFO_INIT_VAL(I)  1
1518 #endif /* CONFIG_TCG_INTERPRETER */
1519 
1520 static inline bool arg_slot_reg_p(unsigned arg_slot)
1521 {
1522     /*
1523      * Split the sizeof away from the comparison to avoid Werror from
1524      * "unsigned < 0 is always false", when iarg_regs is empty.
1525      */
1526     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1527     return arg_slot < nreg;
1528 }
1529 
1530 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1531 {
1532     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1533     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1534 
1535     tcg_debug_assert(stk_slot < max);
1536     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1537 }
1538 
1539 typedef struct TCGCumulativeArgs {
1540     int arg_idx;                /* tcg_gen_callN args[] */
1541     int info_in_idx;            /* TCGHelperInfo in[] */
1542     int arg_slot;               /* regs+stack slot */
1543     int ref_slot;               /* stack slots for references */
1544 } TCGCumulativeArgs;
1545 
1546 static void layout_arg_even(TCGCumulativeArgs *cum)
1547 {
1548     cum->arg_slot += cum->arg_slot & 1;
1549 }
1550 
1551 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1552                          TCGCallArgumentKind kind)
1553 {
1554     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1555 
1556     *loc = (TCGCallArgumentLoc){
1557         .kind = kind,
1558         .arg_idx = cum->arg_idx,
1559         .arg_slot = cum->arg_slot,
1560     };
1561     cum->info_in_idx++;
1562     cum->arg_slot++;
1563 }
1564 
1565 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1566                                 TCGHelperInfo *info, int n)
1567 {
1568     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1569 
1570     for (int i = 0; i < n; ++i) {
1571         /* Layout all using the same arg_idx, adjusting the subindex. */
1572         loc[i] = (TCGCallArgumentLoc){
1573             .kind = TCG_CALL_ARG_NORMAL,
1574             .arg_idx = cum->arg_idx,
1575             .tmp_subindex = i,
1576             .arg_slot = cum->arg_slot + i,
1577         };
1578     }
1579     cum->info_in_idx += n;
1580     cum->arg_slot += n;
1581 }
1582 
1583 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1584 {
1585     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1586     int n = 128 / TCG_TARGET_REG_BITS;
1587 
1588     /* The first subindex carries the pointer. */
1589     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1590 
1591     /*
1592      * The callee is allowed to clobber memory associated with
1593      * structure pass by-reference.  Therefore we must make copies.
1594      * Allocate space from "ref_slot", which will be adjusted to
1595      * follow the parameters on the stack.
1596      */
1597     loc[0].ref_slot = cum->ref_slot;
1598 
1599     /*
1600      * Subsequent words also go into the reference slot, but
1601      * do not accumulate into the regular arguments.
1602      */
1603     for (int i = 1; i < n; ++i) {
1604         loc[i] = (TCGCallArgumentLoc){
1605             .kind = TCG_CALL_ARG_BY_REF_N,
1606             .arg_idx = cum->arg_idx,
1607             .tmp_subindex = i,
1608             .ref_slot = cum->ref_slot + i,
1609         };
1610     }
1611     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1612     cum->ref_slot += n;
1613 }
1614 
1615 static void init_call_layout(TCGHelperInfo *info)
1616 {
1617     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1618     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1619     unsigned typemask = info->typemask;
1620     unsigned typecode;
1621     TCGCumulativeArgs cum = { };
1622 
1623     /*
1624      * Parse and place any function return value.
1625      */
1626     typecode = typemask & 7;
1627     switch (typecode) {
1628     case dh_typecode_void:
1629         info->nr_out = 0;
1630         break;
1631     case dh_typecode_i32:
1632     case dh_typecode_s32:
1633     case dh_typecode_ptr:
1634         info->nr_out = 1;
1635         info->out_kind = TCG_CALL_RET_NORMAL;
1636         break;
1637     case dh_typecode_i64:
1638     case dh_typecode_s64:
1639         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1640         info->out_kind = TCG_CALL_RET_NORMAL;
1641         /* Query the last register now to trigger any assert early. */
1642         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1643         break;
1644     case dh_typecode_i128:
1645         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1646         info->out_kind = TCG_TARGET_CALL_RET_I128;
1647         switch (TCG_TARGET_CALL_RET_I128) {
1648         case TCG_CALL_RET_NORMAL:
1649             /* Query the last register now to trigger any assert early. */
1650             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1651             break;
1652         case TCG_CALL_RET_BY_VEC:
1653             /* Query the single register now to trigger any assert early. */
1654             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1655             break;
1656         case TCG_CALL_RET_BY_REF:
1657             /*
1658              * Allocate the first argument to the output.
1659              * We don't need to store this anywhere, just make it
1660              * unavailable for use in the input loop below.
1661              */
1662             cum.arg_slot = 1;
1663             break;
1664         default:
1665             qemu_build_not_reached();
1666         }
1667         break;
1668     default:
1669         g_assert_not_reached();
1670     }
1671 
1672     /*
1673      * Parse and place function arguments.
1674      */
1675     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1676         TCGCallArgumentKind kind;
1677         TCGType type;
1678 
1679         typecode = typemask & 7;
1680         switch (typecode) {
1681         case dh_typecode_i32:
1682         case dh_typecode_s32:
1683             type = TCG_TYPE_I32;
1684             break;
1685         case dh_typecode_i64:
1686         case dh_typecode_s64:
1687             type = TCG_TYPE_I64;
1688             break;
1689         case dh_typecode_ptr:
1690             type = TCG_TYPE_PTR;
1691             break;
1692         case dh_typecode_i128:
1693             type = TCG_TYPE_I128;
1694             break;
1695         default:
1696             g_assert_not_reached();
1697         }
1698 
1699         switch (type) {
1700         case TCG_TYPE_I32:
1701             switch (TCG_TARGET_CALL_ARG_I32) {
1702             case TCG_CALL_ARG_EVEN:
1703                 layout_arg_even(&cum);
1704                 /* fall through */
1705             case TCG_CALL_ARG_NORMAL:
1706                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1707                 break;
1708             case TCG_CALL_ARG_EXTEND:
1709                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1710                 layout_arg_1(&cum, info, kind);
1711                 break;
1712             default:
1713                 qemu_build_not_reached();
1714             }
1715             break;
1716 
1717         case TCG_TYPE_I64:
1718             switch (TCG_TARGET_CALL_ARG_I64) {
1719             case TCG_CALL_ARG_EVEN:
1720                 layout_arg_even(&cum);
1721                 /* fall through */
1722             case TCG_CALL_ARG_NORMAL:
1723                 if (TCG_TARGET_REG_BITS == 32) {
1724                     layout_arg_normal_n(&cum, info, 2);
1725                 } else {
1726                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1727                 }
1728                 break;
1729             default:
1730                 qemu_build_not_reached();
1731             }
1732             break;
1733 
1734         case TCG_TYPE_I128:
1735             switch (TCG_TARGET_CALL_ARG_I128) {
1736             case TCG_CALL_ARG_EVEN:
1737                 layout_arg_even(&cum);
1738                 /* fall through */
1739             case TCG_CALL_ARG_NORMAL:
1740                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1741                 break;
1742             case TCG_CALL_ARG_BY_REF:
1743                 layout_arg_by_ref(&cum, info);
1744                 break;
1745             default:
1746                 qemu_build_not_reached();
1747             }
1748             break;
1749 
1750         default:
1751             g_assert_not_reached();
1752         }
1753     }
1754     info->nr_in = cum.info_in_idx;
1755 
1756     /* Validate that we didn't overrun the input array. */
1757     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1758     /* Validate the backend has enough argument space. */
1759     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1760 
1761     /*
1762      * Relocate the "ref_slot" area to the end of the parameters.
1763      * Minimizing this stack offset helps code size for x86,
1764      * which has a signed 8-bit offset encoding.
1765      */
1766     if (cum.ref_slot != 0) {
1767         int ref_base = 0;
1768 
1769         if (cum.arg_slot > max_reg_slots) {
1770             int align = __alignof(Int128) / sizeof(tcg_target_long);
1771 
1772             ref_base = cum.arg_slot - max_reg_slots;
1773             if (align > 1) {
1774                 ref_base = ROUND_UP(ref_base, align);
1775             }
1776         }
1777         assert(ref_base + cum.ref_slot <= max_stk_slots);
1778         ref_base += max_reg_slots;
1779 
1780         if (ref_base != 0) {
1781             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1782                 TCGCallArgumentLoc *loc = &info->in[i];
1783                 switch (loc->kind) {
1784                 case TCG_CALL_ARG_BY_REF:
1785                 case TCG_CALL_ARG_BY_REF_N:
1786                     loc->ref_slot += ref_base;
1787                     break;
1788                 default:
1789                     break;
1790                 }
1791             }
1792         }
1793     }
1794 }
1795 
1796 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1797 static void process_constraint_sets(void);
1798 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1799                                             TCGReg reg, const char *name);
1800 
1801 static void tcg_context_init(unsigned max_threads)
1802 {
1803     TCGContext *s = &tcg_init_ctx;
1804     int n, i;
1805     TCGTemp *ts;
1806 
1807     memset(s, 0, sizeof(*s));
1808     s->nb_globals = 0;
1809 
1810     init_call_layout(&info_helper_ld32_mmu);
1811     init_call_layout(&info_helper_ld64_mmu);
1812     init_call_layout(&info_helper_ld128_mmu);
1813     init_call_layout(&info_helper_st32_mmu);
1814     init_call_layout(&info_helper_st64_mmu);
1815     init_call_layout(&info_helper_st128_mmu);
1816 
1817     tcg_target_init(s);
1818     process_constraint_sets();
1819 
1820     /* Reverse the order of the saved registers, assuming they're all at
1821        the start of tcg_target_reg_alloc_order.  */
1822     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1823         int r = tcg_target_reg_alloc_order[n];
1824         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1825             break;
1826         }
1827     }
1828     for (i = 0; i < n; ++i) {
1829         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1830     }
1831     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1832         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1833     }
1834 
1835     tcg_ctx = s;
1836     /*
1837      * In user-mode we simply share the init context among threads, since we
1838      * use a single region. See the documentation tcg_region_init() for the
1839      * reasoning behind this.
1840      * In system-mode we will have at most max_threads TCG threads.
1841      */
1842 #ifdef CONFIG_USER_ONLY
1843     tcg_ctxs = &tcg_ctx;
1844     tcg_cur_ctxs = 1;
1845     tcg_max_ctxs = 1;
1846 #else
1847     tcg_max_ctxs = max_threads;
1848     tcg_ctxs = g_new0(TCGContext *, max_threads);
1849 #endif
1850 
1851     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1852     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1853     tcg_env = temp_tcgv_ptr(ts);
1854 }
1855 
1856 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1857 {
1858     tcg_context_init(max_threads);
1859     tcg_region_init(tb_size, splitwx, max_threads);
1860 }
1861 
1862 /*
1863  * Allocate TBs right before their corresponding translated code, making
1864  * sure that TBs and code are on different cache lines.
1865  */
1866 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1867 {
1868     uintptr_t align = qemu_icache_linesize;
1869     TranslationBlock *tb;
1870     void *next;
1871 
1872  retry:
1873     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1874     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1875 
1876     if (unlikely(next > s->code_gen_highwater)) {
1877         if (tcg_region_alloc(s)) {
1878             return NULL;
1879         }
1880         goto retry;
1881     }
1882     qatomic_set(&s->code_gen_ptr, next);
1883     return tb;
1884 }
1885 
1886 void tcg_prologue_init(void)
1887 {
1888     TCGContext *s = tcg_ctx;
1889     size_t prologue_size;
1890 
1891     s->code_ptr = s->code_gen_ptr;
1892     s->code_buf = s->code_gen_ptr;
1893     s->data_gen_ptr = NULL;
1894 
1895 #ifndef CONFIG_TCG_INTERPRETER
1896     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1897 #endif
1898 
1899     s->pool_labels = NULL;
1900 
1901     qemu_thread_jit_write();
1902     /* Generate the prologue.  */
1903     tcg_target_qemu_prologue(s);
1904 
1905     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1906     {
1907         int result = tcg_out_pool_finalize(s);
1908         tcg_debug_assert(result == 0);
1909     }
1910 
1911     prologue_size = tcg_current_code_size(s);
1912     perf_report_prologue(s->code_gen_ptr, prologue_size);
1913 
1914 #ifndef CONFIG_TCG_INTERPRETER
1915     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1916                         (uintptr_t)s->code_buf, prologue_size);
1917 #endif
1918 
1919     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1920         FILE *logfile = qemu_log_trylock();
1921         if (logfile) {
1922             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1923             if (s->data_gen_ptr) {
1924                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1925                 size_t data_size = prologue_size - code_size;
1926                 size_t i;
1927 
1928                 disas(logfile, s->code_gen_ptr, code_size);
1929 
1930                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1931                     if (sizeof(tcg_target_ulong) == 8) {
1932                         fprintf(logfile,
1933                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1934                                 (uintptr_t)s->data_gen_ptr + i,
1935                                 *(uint64_t *)(s->data_gen_ptr + i));
1936                     } else {
1937                         fprintf(logfile,
1938                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1939                                 (uintptr_t)s->data_gen_ptr + i,
1940                                 *(uint32_t *)(s->data_gen_ptr + i));
1941                     }
1942                 }
1943             } else {
1944                 disas(logfile, s->code_gen_ptr, prologue_size);
1945             }
1946             fprintf(logfile, "\n");
1947             qemu_log_unlock(logfile);
1948         }
1949     }
1950 
1951 #ifndef CONFIG_TCG_INTERPRETER
1952     /*
1953      * Assert that goto_ptr is implemented completely, setting an epilogue.
1954      * For tci, we use NULL as the signal to return from the interpreter,
1955      * so skip this check.
1956      */
1957     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1958 #endif
1959 
1960     tcg_region_prologue_set(s);
1961 }
1962 
1963 void tcg_func_start(TCGContext *s)
1964 {
1965     tcg_pool_reset(s);
1966     s->nb_temps = s->nb_globals;
1967 
1968     /* No temps have been previously allocated for size or locality.  */
1969     tcg_temp_ebb_reset_freed(s);
1970 
1971     /* No constant temps have been previously allocated. */
1972     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1973         if (s->const_table[i]) {
1974             g_hash_table_remove_all(s->const_table[i]);
1975         }
1976     }
1977 
1978     s->nb_ops = 0;
1979     s->nb_labels = 0;
1980     s->current_frame_offset = s->frame_start;
1981 
1982 #ifdef CONFIG_DEBUG_TCG
1983     s->goto_tb_issue_mask = 0;
1984 #endif
1985 
1986     QTAILQ_INIT(&s->ops);
1987     QTAILQ_INIT(&s->free_ops);
1988     s->emit_before_op = NULL;
1989     QSIMPLEQ_INIT(&s->labels);
1990 
1991     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1992 }
1993 
1994 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1995 {
1996     int n = s->nb_temps++;
1997 
1998     if (n >= TCG_MAX_TEMPS) {
1999         tcg_raise_tb_overflow(s);
2000     }
2001     return memset(&s->temps[n], 0, sizeof(TCGTemp));
2002 }
2003 
2004 static TCGTemp *tcg_global_alloc(TCGContext *s)
2005 {
2006     TCGTemp *ts;
2007 
2008     tcg_debug_assert(s->nb_globals == s->nb_temps);
2009     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
2010     s->nb_globals++;
2011     ts = tcg_temp_alloc(s);
2012     ts->kind = TEMP_GLOBAL;
2013 
2014     return ts;
2015 }
2016 
2017 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
2018                                             TCGReg reg, const char *name)
2019 {
2020     TCGTemp *ts;
2021 
2022     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
2023 
2024     ts = tcg_global_alloc(s);
2025     ts->base_type = type;
2026     ts->type = type;
2027     ts->kind = TEMP_FIXED;
2028     ts->reg = reg;
2029     ts->name = name;
2030     tcg_regset_set_reg(s->reserved_regs, reg);
2031 
2032     return ts;
2033 }
2034 
2035 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
2036 {
2037     s->frame_start = start;
2038     s->frame_end = start + size;
2039     s->frame_temp
2040         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
2041 }
2042 
2043 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
2044                                             const char *name, TCGType type)
2045 {
2046     TCGContext *s = tcg_ctx;
2047     TCGTemp *base_ts = tcgv_ptr_temp(base);
2048     TCGTemp *ts = tcg_global_alloc(s);
2049     int indirect_reg = 0;
2050 
2051     switch (base_ts->kind) {
2052     case TEMP_FIXED:
2053         break;
2054     case TEMP_GLOBAL:
2055         /* We do not support double-indirect registers.  */
2056         tcg_debug_assert(!base_ts->indirect_reg);
2057         base_ts->indirect_base = 1;
2058         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
2059                             ? 2 : 1);
2060         indirect_reg = 1;
2061         break;
2062     default:
2063         g_assert_not_reached();
2064     }
2065 
2066     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2067         TCGTemp *ts2 = tcg_global_alloc(s);
2068         char buf[64];
2069 
2070         ts->base_type = TCG_TYPE_I64;
2071         ts->type = TCG_TYPE_I32;
2072         ts->indirect_reg = indirect_reg;
2073         ts->mem_allocated = 1;
2074         ts->mem_base = base_ts;
2075         ts->mem_offset = offset;
2076         pstrcpy(buf, sizeof(buf), name);
2077         pstrcat(buf, sizeof(buf), "_0");
2078         ts->name = strdup(buf);
2079 
2080         tcg_debug_assert(ts2 == ts + 1);
2081         ts2->base_type = TCG_TYPE_I64;
2082         ts2->type = TCG_TYPE_I32;
2083         ts2->indirect_reg = indirect_reg;
2084         ts2->mem_allocated = 1;
2085         ts2->mem_base = base_ts;
2086         ts2->mem_offset = offset + 4;
2087         ts2->temp_subindex = 1;
2088         pstrcpy(buf, sizeof(buf), name);
2089         pstrcat(buf, sizeof(buf), "_1");
2090         ts2->name = strdup(buf);
2091     } else {
2092         ts->base_type = type;
2093         ts->type = type;
2094         ts->indirect_reg = indirect_reg;
2095         ts->mem_allocated = 1;
2096         ts->mem_base = base_ts;
2097         ts->mem_offset = offset;
2098         ts->name = name;
2099     }
2100     return ts;
2101 }
2102 
2103 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
2104 {
2105     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
2106     return temp_tcgv_i32(ts);
2107 }
2108 
2109 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
2110 {
2111     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
2112     return temp_tcgv_i64(ts);
2113 }
2114 
2115 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
2116 {
2117     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
2118     return temp_tcgv_ptr(ts);
2119 }
2120 
2121 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
2122 {
2123     TCGContext *s = tcg_ctx;
2124     TCGTemp *ts;
2125     int n;
2126 
2127     if (kind == TEMP_EBB) {
2128         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
2129 
2130         if (idx < TCG_MAX_TEMPS) {
2131             /* There is already an available temp with the right type.  */
2132             clear_bit(idx, s->free_temps[type].l);
2133 
2134             ts = &s->temps[idx];
2135             ts->temp_allocated = 1;
2136             tcg_debug_assert(ts->base_type == type);
2137             tcg_debug_assert(ts->kind == kind);
2138             return ts;
2139         }
2140     } else {
2141         tcg_debug_assert(kind == TEMP_TB);
2142     }
2143 
2144     switch (type) {
2145     case TCG_TYPE_I32:
2146     case TCG_TYPE_V64:
2147     case TCG_TYPE_V128:
2148     case TCG_TYPE_V256:
2149         n = 1;
2150         break;
2151     case TCG_TYPE_I64:
2152         n = 64 / TCG_TARGET_REG_BITS;
2153         break;
2154     case TCG_TYPE_I128:
2155         n = 128 / TCG_TARGET_REG_BITS;
2156         break;
2157     default:
2158         g_assert_not_reached();
2159     }
2160 
2161     ts = tcg_temp_alloc(s);
2162     ts->base_type = type;
2163     ts->temp_allocated = 1;
2164     ts->kind = kind;
2165 
2166     if (n == 1) {
2167         ts->type = type;
2168     } else {
2169         ts->type = TCG_TYPE_REG;
2170 
2171         for (int i = 1; i < n; ++i) {
2172             TCGTemp *ts2 = tcg_temp_alloc(s);
2173 
2174             tcg_debug_assert(ts2 == ts + i);
2175             ts2->base_type = type;
2176             ts2->type = TCG_TYPE_REG;
2177             ts2->temp_allocated = 1;
2178             ts2->temp_subindex = i;
2179             ts2->kind = kind;
2180         }
2181     }
2182     return ts;
2183 }
2184 
2185 TCGv_i32 tcg_temp_new_i32(void)
2186 {
2187     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2188 }
2189 
2190 TCGv_i32 tcg_temp_ebb_new_i32(void)
2191 {
2192     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2193 }
2194 
2195 TCGv_i64 tcg_temp_new_i64(void)
2196 {
2197     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2198 }
2199 
2200 TCGv_i64 tcg_temp_ebb_new_i64(void)
2201 {
2202     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2203 }
2204 
2205 TCGv_ptr tcg_temp_new_ptr(void)
2206 {
2207     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2208 }
2209 
2210 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2211 {
2212     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2213 }
2214 
2215 TCGv_i128 tcg_temp_new_i128(void)
2216 {
2217     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2218 }
2219 
2220 TCGv_i128 tcg_temp_ebb_new_i128(void)
2221 {
2222     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2223 }
2224 
2225 TCGv_vec tcg_temp_new_vec(TCGType type)
2226 {
2227     TCGTemp *t;
2228 
2229 #ifdef CONFIG_DEBUG_TCG
2230     switch (type) {
2231     case TCG_TYPE_V64:
2232         assert(TCG_TARGET_HAS_v64);
2233         break;
2234     case TCG_TYPE_V128:
2235         assert(TCG_TARGET_HAS_v128);
2236         break;
2237     case TCG_TYPE_V256:
2238         assert(TCG_TARGET_HAS_v256);
2239         break;
2240     default:
2241         g_assert_not_reached();
2242     }
2243 #endif
2244 
2245     t = tcg_temp_new_internal(type, TEMP_EBB);
2246     return temp_tcgv_vec(t);
2247 }
2248 
2249 /* Create a new temp of the same type as an existing temp.  */
2250 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2251 {
2252     TCGTemp *t = tcgv_vec_temp(match);
2253 
2254     tcg_debug_assert(t->temp_allocated != 0);
2255 
2256     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2257     return temp_tcgv_vec(t);
2258 }
2259 
2260 void tcg_temp_free_internal(TCGTemp *ts)
2261 {
2262     TCGContext *s = tcg_ctx;
2263 
2264     switch (ts->kind) {
2265     case TEMP_CONST:
2266     case TEMP_TB:
2267         /* Silently ignore free. */
2268         break;
2269     case TEMP_EBB:
2270         tcg_debug_assert(ts->temp_allocated != 0);
2271         ts->temp_allocated = 0;
2272         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2273         break;
2274     default:
2275         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2276         g_assert_not_reached();
2277     }
2278 }
2279 
2280 void tcg_temp_free_i32(TCGv_i32 arg)
2281 {
2282     tcg_temp_free_internal(tcgv_i32_temp(arg));
2283 }
2284 
2285 void tcg_temp_free_i64(TCGv_i64 arg)
2286 {
2287     tcg_temp_free_internal(tcgv_i64_temp(arg));
2288 }
2289 
2290 void tcg_temp_free_i128(TCGv_i128 arg)
2291 {
2292     tcg_temp_free_internal(tcgv_i128_temp(arg));
2293 }
2294 
2295 void tcg_temp_free_ptr(TCGv_ptr arg)
2296 {
2297     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2298 }
2299 
2300 void tcg_temp_free_vec(TCGv_vec arg)
2301 {
2302     tcg_temp_free_internal(tcgv_vec_temp(arg));
2303 }
2304 
2305 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2306 {
2307     TCGContext *s = tcg_ctx;
2308     GHashTable *h = s->const_table[type];
2309     TCGTemp *ts;
2310 
2311     if (h == NULL) {
2312         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2313         s->const_table[type] = h;
2314     }
2315 
2316     ts = g_hash_table_lookup(h, &val);
2317     if (ts == NULL) {
2318         int64_t *val_ptr;
2319 
2320         ts = tcg_temp_alloc(s);
2321 
2322         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2323             TCGTemp *ts2 = tcg_temp_alloc(s);
2324 
2325             tcg_debug_assert(ts2 == ts + 1);
2326 
2327             ts->base_type = TCG_TYPE_I64;
2328             ts->type = TCG_TYPE_I32;
2329             ts->kind = TEMP_CONST;
2330             ts->temp_allocated = 1;
2331 
2332             ts2->base_type = TCG_TYPE_I64;
2333             ts2->type = TCG_TYPE_I32;
2334             ts2->kind = TEMP_CONST;
2335             ts2->temp_allocated = 1;
2336             ts2->temp_subindex = 1;
2337 
2338             /*
2339              * Retain the full value of the 64-bit constant in the low
2340              * part, so that the hash table works.  Actual uses will
2341              * truncate the value to the low part.
2342              */
2343             ts[HOST_BIG_ENDIAN].val = val;
2344             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2345             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2346         } else {
2347             ts->base_type = type;
2348             ts->type = type;
2349             ts->kind = TEMP_CONST;
2350             ts->temp_allocated = 1;
2351             ts->val = val;
2352             val_ptr = &ts->val;
2353         }
2354         g_hash_table_insert(h, val_ptr, ts);
2355     }
2356 
2357     return ts;
2358 }
2359 
2360 TCGv_i32 tcg_constant_i32(int32_t val)
2361 {
2362     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2363 }
2364 
2365 TCGv_i64 tcg_constant_i64(int64_t val)
2366 {
2367     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2368 }
2369 
2370 TCGv_vaddr tcg_constant_vaddr(uintptr_t val)
2371 {
2372     return temp_tcgv_vaddr(tcg_constant_internal(TCG_TYPE_PTR, val));
2373 }
2374 
2375 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2376 {
2377     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2378 }
2379 
2380 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2381 {
2382     val = dup_const(vece, val);
2383     return temp_tcgv_vec(tcg_constant_internal(type, val));
2384 }
2385 
2386 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2387 {
2388     TCGTemp *t = tcgv_vec_temp(match);
2389 
2390     tcg_debug_assert(t->temp_allocated != 0);
2391     return tcg_constant_vec(t->base_type, vece, val);
2392 }
2393 
2394 #ifdef CONFIG_DEBUG_TCG
2395 size_t temp_idx(TCGTemp *ts)
2396 {
2397     ptrdiff_t n = ts - tcg_ctx->temps;
2398     assert(n >= 0 && n < tcg_ctx->nb_temps);
2399     return n;
2400 }
2401 
2402 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2403 {
2404     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2405 
2406     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2407     assert(o % sizeof(TCGTemp) == 0);
2408 
2409     return (void *)tcg_ctx + (uintptr_t)v;
2410 }
2411 #endif /* CONFIG_DEBUG_TCG */
2412 
2413 /*
2414  * Return true if OP may appear in the opcode stream with TYPE.
2415  * Test the runtime variable that controls each opcode.
2416  */
2417 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2418 {
2419     bool has_type;
2420 
2421     switch (type) {
2422     case TCG_TYPE_I32:
2423         has_type = true;
2424         break;
2425     case TCG_TYPE_I64:
2426         has_type = TCG_TARGET_REG_BITS == 64;
2427         break;
2428     case TCG_TYPE_V64:
2429         has_type = TCG_TARGET_HAS_v64;
2430         break;
2431     case TCG_TYPE_V128:
2432         has_type = TCG_TARGET_HAS_v128;
2433         break;
2434     case TCG_TYPE_V256:
2435         has_type = TCG_TARGET_HAS_v256;
2436         break;
2437     default:
2438         has_type = false;
2439         break;
2440     }
2441 
2442     switch (op) {
2443     case INDEX_op_discard:
2444     case INDEX_op_set_label:
2445     case INDEX_op_call:
2446     case INDEX_op_br:
2447     case INDEX_op_mb:
2448     case INDEX_op_insn_start:
2449     case INDEX_op_exit_tb:
2450     case INDEX_op_goto_tb:
2451     case INDEX_op_goto_ptr:
2452         return true;
2453 
2454     case INDEX_op_qemu_ld:
2455     case INDEX_op_qemu_st:
2456         tcg_debug_assert(type <= TCG_TYPE_REG);
2457         return true;
2458 
2459     case INDEX_op_qemu_ld2:
2460     case INDEX_op_qemu_st2:
2461         if (TCG_TARGET_REG_BITS == 32) {
2462             tcg_debug_assert(type == TCG_TYPE_I64);
2463             return true;
2464         }
2465         tcg_debug_assert(type == TCG_TYPE_I128);
2466         goto do_lookup;
2467 
2468     case INDEX_op_add:
2469     case INDEX_op_and:
2470     case INDEX_op_brcond:
2471     case INDEX_op_deposit:
2472     case INDEX_op_extract:
2473     case INDEX_op_ld8u:
2474     case INDEX_op_ld8s:
2475     case INDEX_op_ld16u:
2476     case INDEX_op_ld16s:
2477     case INDEX_op_ld:
2478     case INDEX_op_mov:
2479     case INDEX_op_movcond:
2480     case INDEX_op_negsetcond:
2481     case INDEX_op_or:
2482     case INDEX_op_setcond:
2483     case INDEX_op_sextract:
2484     case INDEX_op_st8:
2485     case INDEX_op_st16:
2486     case INDEX_op_st:
2487     case INDEX_op_xor:
2488         return has_type;
2489 
2490     case INDEX_op_brcond2_i32:
2491     case INDEX_op_setcond2_i32:
2492         return TCG_TARGET_REG_BITS == 32;
2493 
2494     case INDEX_op_ld32u:
2495     case INDEX_op_ld32s:
2496     case INDEX_op_st32:
2497     case INDEX_op_ext_i32_i64:
2498     case INDEX_op_extu_i32_i64:
2499     case INDEX_op_extrl_i64_i32:
2500     case INDEX_op_extrh_i64_i32:
2501         return TCG_TARGET_REG_BITS == 64;
2502 
2503     case INDEX_op_mov_vec:
2504     case INDEX_op_dup_vec:
2505     case INDEX_op_dupm_vec:
2506     case INDEX_op_ld_vec:
2507     case INDEX_op_st_vec:
2508     case INDEX_op_add_vec:
2509     case INDEX_op_sub_vec:
2510     case INDEX_op_and_vec:
2511     case INDEX_op_or_vec:
2512     case INDEX_op_xor_vec:
2513     case INDEX_op_cmp_vec:
2514         return has_type;
2515     case INDEX_op_dup2_vec:
2516         return has_type && TCG_TARGET_REG_BITS == 32;
2517     case INDEX_op_not_vec:
2518         return has_type && TCG_TARGET_HAS_not_vec;
2519     case INDEX_op_neg_vec:
2520         return has_type && TCG_TARGET_HAS_neg_vec;
2521     case INDEX_op_abs_vec:
2522         return has_type && TCG_TARGET_HAS_abs_vec;
2523     case INDEX_op_andc_vec:
2524         return has_type && TCG_TARGET_HAS_andc_vec;
2525     case INDEX_op_orc_vec:
2526         return has_type && TCG_TARGET_HAS_orc_vec;
2527     case INDEX_op_nand_vec:
2528         return has_type && TCG_TARGET_HAS_nand_vec;
2529     case INDEX_op_nor_vec:
2530         return has_type && TCG_TARGET_HAS_nor_vec;
2531     case INDEX_op_eqv_vec:
2532         return has_type && TCG_TARGET_HAS_eqv_vec;
2533     case INDEX_op_mul_vec:
2534         return has_type && TCG_TARGET_HAS_mul_vec;
2535     case INDEX_op_shli_vec:
2536     case INDEX_op_shri_vec:
2537     case INDEX_op_sari_vec:
2538         return has_type && TCG_TARGET_HAS_shi_vec;
2539     case INDEX_op_shls_vec:
2540     case INDEX_op_shrs_vec:
2541     case INDEX_op_sars_vec:
2542         return has_type && TCG_TARGET_HAS_shs_vec;
2543     case INDEX_op_shlv_vec:
2544     case INDEX_op_shrv_vec:
2545     case INDEX_op_sarv_vec:
2546         return has_type && TCG_TARGET_HAS_shv_vec;
2547     case INDEX_op_rotli_vec:
2548         return has_type && TCG_TARGET_HAS_roti_vec;
2549     case INDEX_op_rotls_vec:
2550         return has_type && TCG_TARGET_HAS_rots_vec;
2551     case INDEX_op_rotlv_vec:
2552     case INDEX_op_rotrv_vec:
2553         return has_type && TCG_TARGET_HAS_rotv_vec;
2554     case INDEX_op_ssadd_vec:
2555     case INDEX_op_usadd_vec:
2556     case INDEX_op_sssub_vec:
2557     case INDEX_op_ussub_vec:
2558         return has_type && TCG_TARGET_HAS_sat_vec;
2559     case INDEX_op_smin_vec:
2560     case INDEX_op_umin_vec:
2561     case INDEX_op_smax_vec:
2562     case INDEX_op_umax_vec:
2563         return has_type && TCG_TARGET_HAS_minmax_vec;
2564     case INDEX_op_bitsel_vec:
2565         return has_type && TCG_TARGET_HAS_bitsel_vec;
2566     case INDEX_op_cmpsel_vec:
2567         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2568 
2569     default:
2570         if (op < INDEX_op_last_generic) {
2571             const TCGOutOp *outop;
2572             TCGConstraintSetIndex con_set;
2573 
2574             if (!has_type) {
2575                 return false;
2576             }
2577 
2578     do_lookup:
2579             outop = all_outop[op];
2580             tcg_debug_assert(outop != NULL);
2581 
2582             con_set = outop->static_constraint;
2583             if (con_set == C_Dynamic) {
2584                 con_set = outop->dynamic_constraint(type, flags);
2585             }
2586             if (con_set >= 0) {
2587                 return true;
2588             }
2589             tcg_debug_assert(con_set == C_NotImplemented);
2590             return false;
2591         }
2592         tcg_debug_assert(op < NB_OPS);
2593         return true;
2594 
2595     case INDEX_op_last_generic:
2596         g_assert_not_reached();
2597     }
2598 }
2599 
2600 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2601 {
2602     unsigned width;
2603 
2604     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2605     width = (type == TCG_TYPE_I32 ? 32 : 64);
2606 
2607     tcg_debug_assert(ofs < width);
2608     tcg_debug_assert(len > 0);
2609     tcg_debug_assert(len <= width - ofs);
2610 
2611     return TCG_TARGET_deposit_valid(type, ofs, len);
2612 }
2613 
2614 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2615 
2616 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2617                           TCGTemp *ret, TCGTemp **args)
2618 {
2619     TCGv_i64 extend_free[MAX_CALL_IARGS];
2620     int n_extend = 0;
2621     TCGOp *op;
2622     int i, n, pi = 0, total_args;
2623 
2624     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2625         init_call_layout(info);
2626         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2627     }
2628 
2629     total_args = info->nr_out + info->nr_in + 2;
2630     op = tcg_op_alloc(INDEX_op_call, total_args);
2631 
2632 #ifdef CONFIG_PLUGIN
2633     /* Flag helpers that may affect guest state */
2634     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2635         tcg_ctx->plugin_insn->calls_helpers = true;
2636     }
2637 #endif
2638 
2639     TCGOP_CALLO(op) = n = info->nr_out;
2640     switch (n) {
2641     case 0:
2642         tcg_debug_assert(ret == NULL);
2643         break;
2644     case 1:
2645         tcg_debug_assert(ret != NULL);
2646         op->args[pi++] = temp_arg(ret);
2647         break;
2648     case 2:
2649     case 4:
2650         tcg_debug_assert(ret != NULL);
2651         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2652         tcg_debug_assert(ret->temp_subindex == 0);
2653         for (i = 0; i < n; ++i) {
2654             op->args[pi++] = temp_arg(ret + i);
2655         }
2656         break;
2657     default:
2658         g_assert_not_reached();
2659     }
2660 
2661     TCGOP_CALLI(op) = n = info->nr_in;
2662     for (i = 0; i < n; i++) {
2663         const TCGCallArgumentLoc *loc = &info->in[i];
2664         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2665 
2666         switch (loc->kind) {
2667         case TCG_CALL_ARG_NORMAL:
2668         case TCG_CALL_ARG_BY_REF:
2669         case TCG_CALL_ARG_BY_REF_N:
2670             op->args[pi++] = temp_arg(ts);
2671             break;
2672 
2673         case TCG_CALL_ARG_EXTEND_U:
2674         case TCG_CALL_ARG_EXTEND_S:
2675             {
2676                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2677                 TCGv_i32 orig = temp_tcgv_i32(ts);
2678 
2679                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2680                     tcg_gen_ext_i32_i64(temp, orig);
2681                 } else {
2682                     tcg_gen_extu_i32_i64(temp, orig);
2683                 }
2684                 op->args[pi++] = tcgv_i64_arg(temp);
2685                 extend_free[n_extend++] = temp;
2686             }
2687             break;
2688 
2689         default:
2690             g_assert_not_reached();
2691         }
2692     }
2693     op->args[pi++] = (uintptr_t)func;
2694     op->args[pi++] = (uintptr_t)info;
2695     tcg_debug_assert(pi == total_args);
2696 
2697     if (tcg_ctx->emit_before_op) {
2698         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2699     } else {
2700         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2701     }
2702 
2703     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2704     for (i = 0; i < n_extend; ++i) {
2705         tcg_temp_free_i64(extend_free[i]);
2706     }
2707 }
2708 
2709 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2710 {
2711     tcg_gen_callN(func, info, ret, NULL);
2712 }
2713 
2714 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2715 {
2716     tcg_gen_callN(func, info, ret, &t1);
2717 }
2718 
2719 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2720                    TCGTemp *t1, TCGTemp *t2)
2721 {
2722     TCGTemp *args[2] = { t1, t2 };
2723     tcg_gen_callN(func, info, ret, args);
2724 }
2725 
2726 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2727                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2728 {
2729     TCGTemp *args[3] = { t1, t2, t3 };
2730     tcg_gen_callN(func, info, ret, args);
2731 }
2732 
2733 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2734                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2735 {
2736     TCGTemp *args[4] = { t1, t2, t3, t4 };
2737     tcg_gen_callN(func, info, ret, args);
2738 }
2739 
2740 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2741                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2742 {
2743     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2744     tcg_gen_callN(func, info, ret, args);
2745 }
2746 
2747 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2748                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2749                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2750 {
2751     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2752     tcg_gen_callN(func, info, ret, args);
2753 }
2754 
2755 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2756                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2757                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2758 {
2759     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2760     tcg_gen_callN(func, info, ret, args);
2761 }
2762 
2763 static void tcg_reg_alloc_start(TCGContext *s)
2764 {
2765     int i, n;
2766 
2767     for (i = 0, n = s->nb_temps; i < n; i++) {
2768         TCGTemp *ts = &s->temps[i];
2769         TCGTempVal val = TEMP_VAL_MEM;
2770 
2771         switch (ts->kind) {
2772         case TEMP_CONST:
2773             val = TEMP_VAL_CONST;
2774             break;
2775         case TEMP_FIXED:
2776             val = TEMP_VAL_REG;
2777             break;
2778         case TEMP_GLOBAL:
2779             break;
2780         case TEMP_EBB:
2781             val = TEMP_VAL_DEAD;
2782             /* fall through */
2783         case TEMP_TB:
2784             ts->mem_allocated = 0;
2785             break;
2786         default:
2787             g_assert_not_reached();
2788         }
2789         ts->val_type = val;
2790     }
2791 
2792     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2793 }
2794 
2795 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2796                                  TCGTemp *ts)
2797 {
2798     int idx = temp_idx(ts);
2799 
2800     switch (ts->kind) {
2801     case TEMP_FIXED:
2802     case TEMP_GLOBAL:
2803         pstrcpy(buf, buf_size, ts->name);
2804         break;
2805     case TEMP_TB:
2806         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2807         break;
2808     case TEMP_EBB:
2809         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2810         break;
2811     case TEMP_CONST:
2812         switch (ts->type) {
2813         case TCG_TYPE_I32:
2814             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2815             break;
2816 #if TCG_TARGET_REG_BITS > 32
2817         case TCG_TYPE_I64:
2818             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2819             break;
2820 #endif
2821         case TCG_TYPE_V64:
2822         case TCG_TYPE_V128:
2823         case TCG_TYPE_V256:
2824             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2825                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2826             break;
2827         default:
2828             g_assert_not_reached();
2829         }
2830         break;
2831     }
2832     return buf;
2833 }
2834 
2835 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2836                              int buf_size, TCGArg arg)
2837 {
2838     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2839 }
2840 
2841 static const char * const cond_name[] =
2842 {
2843     [TCG_COND_NEVER] = "never",
2844     [TCG_COND_ALWAYS] = "always",
2845     [TCG_COND_EQ] = "eq",
2846     [TCG_COND_NE] = "ne",
2847     [TCG_COND_LT] = "lt",
2848     [TCG_COND_GE] = "ge",
2849     [TCG_COND_LE] = "le",
2850     [TCG_COND_GT] = "gt",
2851     [TCG_COND_LTU] = "ltu",
2852     [TCG_COND_GEU] = "geu",
2853     [TCG_COND_LEU] = "leu",
2854     [TCG_COND_GTU] = "gtu",
2855     [TCG_COND_TSTEQ] = "tsteq",
2856     [TCG_COND_TSTNE] = "tstne",
2857 };
2858 
2859 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2860 {
2861     [MO_UB]   = "ub",
2862     [MO_SB]   = "sb",
2863     [MO_LEUW] = "leuw",
2864     [MO_LESW] = "lesw",
2865     [MO_LEUL] = "leul",
2866     [MO_LESL] = "lesl",
2867     [MO_LEUQ] = "leq",
2868     [MO_BEUW] = "beuw",
2869     [MO_BESW] = "besw",
2870     [MO_BEUL] = "beul",
2871     [MO_BESL] = "besl",
2872     [MO_BEUQ] = "beq",
2873     [MO_128 + MO_BE] = "beo",
2874     [MO_128 + MO_LE] = "leo",
2875 };
2876 
2877 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2878     [MO_UNALN >> MO_ASHIFT]    = "un+",
2879     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2880     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2881     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2882     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2883     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2884     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2885     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2886 };
2887 
2888 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2889     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2890     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2891     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2892     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2893     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2894     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2895 };
2896 
2897 static const char bswap_flag_name[][6] = {
2898     [TCG_BSWAP_IZ] = "iz",
2899     [TCG_BSWAP_OZ] = "oz",
2900     [TCG_BSWAP_OS] = "os",
2901     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2902     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2903 };
2904 
2905 #ifdef CONFIG_PLUGIN
2906 static const char * const plugin_from_name[] = {
2907     "from-tb",
2908     "from-insn",
2909     "after-insn",
2910     "after-tb",
2911 };
2912 #endif
2913 
2914 static inline bool tcg_regset_single(TCGRegSet d)
2915 {
2916     return (d & (d - 1)) == 0;
2917 }
2918 
2919 static inline TCGReg tcg_regset_first(TCGRegSet d)
2920 {
2921     if (TCG_TARGET_NB_REGS <= 32) {
2922         return ctz32(d);
2923     } else {
2924         return ctz64(d);
2925     }
2926 }
2927 
2928 /* Return only the number of characters output -- no error return. */
2929 #define ne_fprintf(...) \
2930     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2931 
2932 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2933 {
2934     char buf[128];
2935     TCGOp *op;
2936 
2937     QTAILQ_FOREACH(op, &s->ops, link) {
2938         int i, k, nb_oargs, nb_iargs, nb_cargs;
2939         const TCGOpDef *def;
2940         TCGOpcode c;
2941         int col = 0;
2942 
2943         c = op->opc;
2944         def = &tcg_op_defs[c];
2945 
2946         if (c == INDEX_op_insn_start) {
2947             nb_oargs = 0;
2948             col += ne_fprintf(f, "\n ----");
2949 
2950             for (i = 0, k = INSN_START_WORDS; i < k; ++i) {
2951                 col += ne_fprintf(f, " %016" PRIx64,
2952                                   tcg_get_insn_start_param(op, i));
2953             }
2954         } else if (c == INDEX_op_call) {
2955             const TCGHelperInfo *info = tcg_call_info(op);
2956             void *func = tcg_call_func(op);
2957 
2958             /* variable number of arguments */
2959             nb_oargs = TCGOP_CALLO(op);
2960             nb_iargs = TCGOP_CALLI(op);
2961             nb_cargs = def->nb_cargs;
2962 
2963             col += ne_fprintf(f, " %s ", def->name);
2964 
2965             /*
2966              * Print the function name from TCGHelperInfo, if available.
2967              * Note that plugins have a template function for the info,
2968              * but the actual function pointer comes from the plugin.
2969              */
2970             if (func == info->func) {
2971                 col += ne_fprintf(f, "%s", info->name);
2972             } else {
2973                 col += ne_fprintf(f, "plugin(%p)", func);
2974             }
2975 
2976             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2977             for (i = 0; i < nb_oargs; i++) {
2978                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2979                                                             op->args[i]));
2980             }
2981             for (i = 0; i < nb_iargs; i++) {
2982                 TCGArg arg = op->args[nb_oargs + i];
2983                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2984                 col += ne_fprintf(f, ",%s", t);
2985             }
2986         } else {
2987             if (def->flags & TCG_OPF_INT) {
2988                 col += ne_fprintf(f, " %s_i%d ",
2989                                   def->name,
2990                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2991             } else if (def->flags & TCG_OPF_VECTOR) {
2992                 col += ne_fprintf(f, "%s v%d,e%d,",
2993                                   def->name,
2994                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2995                                   8 << TCGOP_VECE(op));
2996             } else {
2997                 col += ne_fprintf(f, " %s ", def->name);
2998             }
2999 
3000             nb_oargs = def->nb_oargs;
3001             nb_iargs = def->nb_iargs;
3002             nb_cargs = def->nb_cargs;
3003 
3004             k = 0;
3005             for (i = 0; i < nb_oargs; i++) {
3006                 const char *sep =  k ? "," : "";
3007                 col += ne_fprintf(f, "%s%s", sep,
3008                                   tcg_get_arg_str(s, buf, sizeof(buf),
3009                                                   op->args[k++]));
3010             }
3011             for (i = 0; i < nb_iargs; i++) {
3012                 const char *sep =  k ? "," : "";
3013                 col += ne_fprintf(f, "%s%s", sep,
3014                                   tcg_get_arg_str(s, buf, sizeof(buf),
3015                                                   op->args[k++]));
3016             }
3017             switch (c) {
3018             case INDEX_op_brcond:
3019             case INDEX_op_setcond:
3020             case INDEX_op_negsetcond:
3021             case INDEX_op_movcond:
3022             case INDEX_op_brcond2_i32:
3023             case INDEX_op_setcond2_i32:
3024             case INDEX_op_cmp_vec:
3025             case INDEX_op_cmpsel_vec:
3026                 if (op->args[k] < ARRAY_SIZE(cond_name)
3027                     && cond_name[op->args[k]]) {
3028                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
3029                 } else {
3030                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
3031                 }
3032                 i = 1;
3033                 break;
3034             case INDEX_op_qemu_ld:
3035             case INDEX_op_qemu_st:
3036             case INDEX_op_qemu_ld2:
3037             case INDEX_op_qemu_st2:
3038                 {
3039                     const char *s_al, *s_op, *s_at;
3040                     MemOpIdx oi = op->args[k++];
3041                     MemOp mop = get_memop(oi);
3042                     unsigned ix = get_mmuidx(oi);
3043 
3044                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
3045                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
3046                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
3047                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
3048 
3049                     /* If all fields are accounted for, print symbolically. */
3050                     if (!mop && s_al && s_op && s_at) {
3051                         col += ne_fprintf(f, ",%s%s%s,%u",
3052                                           s_at, s_al, s_op, ix);
3053                     } else {
3054                         mop = get_memop(oi);
3055                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
3056                     }
3057                     i = 1;
3058                 }
3059                 break;
3060             case INDEX_op_bswap16:
3061             case INDEX_op_bswap32:
3062             case INDEX_op_bswap64:
3063                 {
3064                     TCGArg flags = op->args[k];
3065                     const char *name = NULL;
3066 
3067                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
3068                         name = bswap_flag_name[flags];
3069                     }
3070                     if (name) {
3071                         col += ne_fprintf(f, ",%s", name);
3072                     } else {
3073                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
3074                     }
3075                     i = k = 1;
3076                 }
3077                 break;
3078 #ifdef CONFIG_PLUGIN
3079             case INDEX_op_plugin_cb:
3080                 {
3081                     TCGArg from = op->args[k++];
3082                     const char *name = NULL;
3083 
3084                     if (from < ARRAY_SIZE(plugin_from_name)) {
3085                         name = plugin_from_name[from];
3086                     }
3087                     if (name) {
3088                         col += ne_fprintf(f, "%s", name);
3089                     } else {
3090                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
3091                     }
3092                     i = 1;
3093                 }
3094                 break;
3095 #endif
3096             default:
3097                 i = 0;
3098                 break;
3099             }
3100             switch (c) {
3101             case INDEX_op_set_label:
3102             case INDEX_op_br:
3103             case INDEX_op_brcond:
3104             case INDEX_op_brcond2_i32:
3105                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
3106                                   arg_label(op->args[k])->id);
3107                 i++, k++;
3108                 break;
3109             case INDEX_op_mb:
3110                 {
3111                     TCGBar membar = op->args[k];
3112                     const char *b_op, *m_op;
3113 
3114                     switch (membar & TCG_BAR_SC) {
3115                     case 0:
3116                         b_op = "none";
3117                         break;
3118                     case TCG_BAR_LDAQ:
3119                         b_op = "acq";
3120                         break;
3121                     case TCG_BAR_STRL:
3122                         b_op = "rel";
3123                         break;
3124                     case TCG_BAR_SC:
3125                         b_op = "seq";
3126                         break;
3127                     default:
3128                         g_assert_not_reached();
3129                     }
3130 
3131                     switch (membar & TCG_MO_ALL) {
3132                     case 0:
3133                         m_op = "none";
3134                         break;
3135                     case TCG_MO_LD_LD:
3136                         m_op = "rr";
3137                         break;
3138                     case TCG_MO_LD_ST:
3139                         m_op = "rw";
3140                         break;
3141                     case TCG_MO_ST_LD:
3142                         m_op = "wr";
3143                         break;
3144                     case TCG_MO_ST_ST:
3145                         m_op = "ww";
3146                         break;
3147                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3148                         m_op = "rr+rw";
3149                         break;
3150                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3151                         m_op = "rr+wr";
3152                         break;
3153                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3154                         m_op = "rr+ww";
3155                         break;
3156                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3157                         m_op = "rw+wr";
3158                         break;
3159                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3160                         m_op = "rw+ww";
3161                         break;
3162                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3163                         m_op = "wr+ww";
3164                         break;
3165                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3166                         m_op = "rr+rw+wr";
3167                         break;
3168                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3169                         m_op = "rr+rw+ww";
3170                         break;
3171                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3172                         m_op = "rr+wr+ww";
3173                         break;
3174                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3175                         m_op = "rw+wr+ww";
3176                         break;
3177                     case TCG_MO_ALL:
3178                         m_op = "all";
3179                         break;
3180                     default:
3181                         g_assert_not_reached();
3182                     }
3183 
3184                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3185                     i++, k++;
3186                 }
3187                 break;
3188             default:
3189                 break;
3190             }
3191             for (; i < nb_cargs; i++, k++) {
3192                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3193                                   op->args[k]);
3194             }
3195         }
3196 
3197         if (have_prefs || op->life) {
3198             for (; col < 40; ++col) {
3199                 putc(' ', f);
3200             }
3201         }
3202 
3203         if (op->life) {
3204             unsigned life = op->life;
3205 
3206             if (life & (SYNC_ARG * 3)) {
3207                 ne_fprintf(f, "  sync:");
3208                 for (i = 0; i < 2; ++i) {
3209                     if (life & (SYNC_ARG << i)) {
3210                         ne_fprintf(f, " %d", i);
3211                     }
3212                 }
3213             }
3214             life /= DEAD_ARG;
3215             if (life) {
3216                 ne_fprintf(f, "  dead:");
3217                 for (i = 0; life; ++i, life >>= 1) {
3218                     if (life & 1) {
3219                         ne_fprintf(f, " %d", i);
3220                     }
3221                 }
3222             }
3223         }
3224 
3225         if (have_prefs) {
3226             for (i = 0; i < nb_oargs; ++i) {
3227                 TCGRegSet set = output_pref(op, i);
3228 
3229                 if (i == 0) {
3230                     ne_fprintf(f, "  pref=");
3231                 } else {
3232                     ne_fprintf(f, ",");
3233                 }
3234                 if (set == 0) {
3235                     ne_fprintf(f, "none");
3236                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3237                     ne_fprintf(f, "all");
3238 #ifdef CONFIG_DEBUG_TCG
3239                 } else if (tcg_regset_single(set)) {
3240                     TCGReg reg = tcg_regset_first(set);
3241                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3242 #endif
3243                 } else if (TCG_TARGET_NB_REGS <= 32) {
3244                     ne_fprintf(f, "0x%x", (uint32_t)set);
3245                 } else {
3246                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3247                 }
3248             }
3249         }
3250 
3251         putc('\n', f);
3252     }
3253 }
3254 
3255 /* we give more priority to constraints with less registers */
3256 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3257 {
3258     int n;
3259 
3260     arg_ct += k;
3261     n = ctpop64(arg_ct->regs);
3262 
3263     /*
3264      * Sort constraints of a single register first, which includes output
3265      * aliases (which must exactly match the input already allocated).
3266      */
3267     if (n == 1 || arg_ct->oalias) {
3268         return INT_MAX;
3269     }
3270 
3271     /*
3272      * Sort register pairs next, first then second immediately after.
3273      * Arbitrarily sort multiple pairs by the index of the first reg;
3274      * there shouldn't be many pairs.
3275      */
3276     switch (arg_ct->pair) {
3277     case 1:
3278     case 3:
3279         return (k + 1) * 2;
3280     case 2:
3281         return (arg_ct->pair_index + 1) * 2 - 1;
3282     }
3283 
3284     /* Finally, sort by decreasing register count. */
3285     assert(n > 1);
3286     return -n;
3287 }
3288 
3289 /* sort from highest priority to lowest */
3290 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3291 {
3292     int i, j;
3293 
3294     for (i = 0; i < n; i++) {
3295         a[start + i].sort_index = start + i;
3296     }
3297     if (n <= 1) {
3298         return;
3299     }
3300     for (i = 0; i < n - 1; i++) {
3301         for (j = i + 1; j < n; j++) {
3302             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3303             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3304             if (p1 < p2) {
3305                 int tmp = a[start + i].sort_index;
3306                 a[start + i].sort_index = a[start + j].sort_index;
3307                 a[start + j].sort_index = tmp;
3308             }
3309         }
3310     }
3311 }
3312 
3313 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3314 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3315 
3316 static void process_constraint_sets(void)
3317 {
3318     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3319         const TCGConstraintSet *tdefs = &constraint_sets[c];
3320         TCGArgConstraint *args_ct = all_cts[c];
3321         int nb_oargs = tdefs->nb_oargs;
3322         int nb_iargs = tdefs->nb_iargs;
3323         int nb_args = nb_oargs + nb_iargs;
3324         bool saw_alias_pair = false;
3325 
3326         for (int i = 0; i < nb_args; i++) {
3327             const char *ct_str = tdefs->args_ct_str[i];
3328             bool input_p = i >= nb_oargs;
3329             int o;
3330 
3331             switch (*ct_str) {
3332             case '0' ... '9':
3333                 o = *ct_str - '0';
3334                 tcg_debug_assert(input_p);
3335                 tcg_debug_assert(o < nb_oargs);
3336                 tcg_debug_assert(args_ct[o].regs != 0);
3337                 tcg_debug_assert(!args_ct[o].oalias);
3338                 args_ct[i] = args_ct[o];
3339                 /* The output sets oalias.  */
3340                 args_ct[o].oalias = 1;
3341                 args_ct[o].alias_index = i;
3342                 /* The input sets ialias. */
3343                 args_ct[i].ialias = 1;
3344                 args_ct[i].alias_index = o;
3345                 if (args_ct[i].pair) {
3346                     saw_alias_pair = true;
3347                 }
3348                 tcg_debug_assert(ct_str[1] == '\0');
3349                 continue;
3350 
3351             case '&':
3352                 tcg_debug_assert(!input_p);
3353                 args_ct[i].newreg = true;
3354                 ct_str++;
3355                 break;
3356 
3357             case 'p': /* plus */
3358                 /* Allocate to the register after the previous. */
3359                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3360                 o = i - 1;
3361                 tcg_debug_assert(!args_ct[o].pair);
3362                 tcg_debug_assert(!args_ct[o].ct);
3363                 args_ct[i] = (TCGArgConstraint){
3364                     .pair = 2,
3365                     .pair_index = o,
3366                     .regs = args_ct[o].regs << 1,
3367                     .newreg = args_ct[o].newreg,
3368                 };
3369                 args_ct[o].pair = 1;
3370                 args_ct[o].pair_index = i;
3371                 tcg_debug_assert(ct_str[1] == '\0');
3372                 continue;
3373 
3374             case 'm': /* minus */
3375                 /* Allocate to the register before the previous. */
3376                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3377                 o = i - 1;
3378                 tcg_debug_assert(!args_ct[o].pair);
3379                 tcg_debug_assert(!args_ct[o].ct);
3380                 args_ct[i] = (TCGArgConstraint){
3381                     .pair = 1,
3382                     .pair_index = o,
3383                     .regs = args_ct[o].regs >> 1,
3384                     .newreg = args_ct[o].newreg,
3385                 };
3386                 args_ct[o].pair = 2;
3387                 args_ct[o].pair_index = i;
3388                 tcg_debug_assert(ct_str[1] == '\0');
3389                 continue;
3390             }
3391 
3392             do {
3393                 switch (*ct_str) {
3394                 case 'i':
3395                     args_ct[i].ct |= TCG_CT_CONST;
3396                     break;
3397 #ifdef TCG_REG_ZERO
3398                 case 'z':
3399                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3400                     break;
3401 #endif
3402 
3403                 /* Include all of the target-specific constraints. */
3404 
3405 #undef CONST
3406 #define CONST(CASE, MASK) \
3407     case CASE: args_ct[i].ct |= MASK; break;
3408 #define REGS(CASE, MASK) \
3409     case CASE: args_ct[i].regs |= MASK; break;
3410 
3411 #include "tcg-target-con-str.h"
3412 
3413 #undef REGS
3414 #undef CONST
3415                 default:
3416                 case '0' ... '9':
3417                 case '&':
3418                 case 'p':
3419                 case 'm':
3420                     /* Typo in TCGConstraintSet constraint. */
3421                     g_assert_not_reached();
3422                 }
3423             } while (*++ct_str != '\0');
3424         }
3425 
3426         /*
3427          * Fix up output pairs that are aliased with inputs.
3428          * When we created the alias, we copied pair from the output.
3429          * There are three cases:
3430          *    (1a) Pairs of inputs alias pairs of outputs.
3431          *    (1b) One input aliases the first of a pair of outputs.
3432          *    (2)  One input aliases the second of a pair of outputs.
3433          *
3434          * Case 1a is handled by making sure that the pair_index'es are
3435          * properly updated so that they appear the same as a pair of inputs.
3436          *
3437          * Case 1b is handled by setting the pair_index of the input to
3438          * itself, simply so it doesn't point to an unrelated argument.
3439          * Since we don't encounter the "second" during the input allocation
3440          * phase, nothing happens with the second half of the input pair.
3441          *
3442          * Case 2 is handled by setting the second input to pair=3, the
3443          * first output to pair=3, and the pair_index'es to match.
3444          */
3445         if (saw_alias_pair) {
3446             for (int i = nb_oargs; i < nb_args; i++) {
3447                 int o, o2, i2;
3448 
3449                 /*
3450                  * Since [0-9pm] must be alone in the constraint string,
3451                  * the only way they can both be set is if the pair comes
3452                  * from the output alias.
3453                  */
3454                 if (!args_ct[i].ialias) {
3455                     continue;
3456                 }
3457                 switch (args_ct[i].pair) {
3458                 case 0:
3459                     break;
3460                 case 1:
3461                     o = args_ct[i].alias_index;
3462                     o2 = args_ct[o].pair_index;
3463                     tcg_debug_assert(args_ct[o].pair == 1);
3464                     tcg_debug_assert(args_ct[o2].pair == 2);
3465                     if (args_ct[o2].oalias) {
3466                         /* Case 1a */
3467                         i2 = args_ct[o2].alias_index;
3468                         tcg_debug_assert(args_ct[i2].pair == 2);
3469                         args_ct[i2].pair_index = i;
3470                         args_ct[i].pair_index = i2;
3471                     } else {
3472                         /* Case 1b */
3473                         args_ct[i].pair_index = i;
3474                     }
3475                     break;
3476                 case 2:
3477                     o = args_ct[i].alias_index;
3478                     o2 = args_ct[o].pair_index;
3479                     tcg_debug_assert(args_ct[o].pair == 2);
3480                     tcg_debug_assert(args_ct[o2].pair == 1);
3481                     if (args_ct[o2].oalias) {
3482                         /* Case 1a */
3483                         i2 = args_ct[o2].alias_index;
3484                         tcg_debug_assert(args_ct[i2].pair == 1);
3485                         args_ct[i2].pair_index = i;
3486                         args_ct[i].pair_index = i2;
3487                     } else {
3488                         /* Case 2 */
3489                         args_ct[i].pair = 3;
3490                         args_ct[o2].pair = 3;
3491                         args_ct[i].pair_index = o2;
3492                         args_ct[o2].pair_index = i;
3493                     }
3494                     break;
3495                 default:
3496                     g_assert_not_reached();
3497                 }
3498             }
3499         }
3500 
3501         /* sort the constraints (XXX: this is just an heuristic) */
3502         sort_constraints(args_ct, 0, nb_oargs);
3503         sort_constraints(args_ct, nb_oargs, nb_iargs);
3504     }
3505 }
3506 
3507 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3508 {
3509     TCGOpcode opc = op->opc;
3510     TCGType type = TCGOP_TYPE(op);
3511     unsigned flags = TCGOP_FLAGS(op);
3512     const TCGOpDef *def = &tcg_op_defs[opc];
3513     const TCGOutOp *outop = all_outop[opc];
3514     TCGConstraintSetIndex con_set;
3515 
3516     if (def->flags & TCG_OPF_NOT_PRESENT) {
3517         return empty_cts;
3518     }
3519 
3520     if (outop) {
3521         con_set = outop->static_constraint;
3522         if (con_set == C_Dynamic) {
3523             con_set = outop->dynamic_constraint(type, flags);
3524         }
3525     } else {
3526         con_set = tcg_target_op_def(opc, type, flags);
3527     }
3528     tcg_debug_assert(con_set >= 0);
3529     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3530 
3531     /* The constraint arguments must match TCGOpcode arguments. */
3532     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3533     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3534 
3535     return all_cts[con_set];
3536 }
3537 
3538 static void remove_label_use(TCGOp *op, int idx)
3539 {
3540     TCGLabel *label = arg_label(op->args[idx]);
3541     TCGLabelUse *use;
3542 
3543     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3544         if (use->op == op) {
3545             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3546             return;
3547         }
3548     }
3549     g_assert_not_reached();
3550 }
3551 
3552 void tcg_op_remove(TCGContext *s, TCGOp *op)
3553 {
3554     switch (op->opc) {
3555     case INDEX_op_br:
3556         remove_label_use(op, 0);
3557         break;
3558     case INDEX_op_brcond:
3559         remove_label_use(op, 3);
3560         break;
3561     case INDEX_op_brcond2_i32:
3562         remove_label_use(op, 5);
3563         break;
3564     default:
3565         break;
3566     }
3567 
3568     QTAILQ_REMOVE(&s->ops, op, link);
3569     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3570     s->nb_ops--;
3571 }
3572 
3573 void tcg_remove_ops_after(TCGOp *op)
3574 {
3575     TCGContext *s = tcg_ctx;
3576 
3577     while (true) {
3578         TCGOp *last = tcg_last_op();
3579         if (last == op) {
3580             return;
3581         }
3582         tcg_op_remove(s, last);
3583     }
3584 }
3585 
3586 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3587 {
3588     TCGContext *s = tcg_ctx;
3589     TCGOp *op = NULL;
3590 
3591     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3592         QTAILQ_FOREACH(op, &s->free_ops, link) {
3593             if (nargs <= op->nargs) {
3594                 QTAILQ_REMOVE(&s->free_ops, op, link);
3595                 nargs = op->nargs;
3596                 goto found;
3597             }
3598         }
3599     }
3600 
3601     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3602     nargs = MAX(4, nargs);
3603     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3604 
3605  found:
3606     memset(op, 0, offsetof(TCGOp, link));
3607     op->opc = opc;
3608     op->nargs = nargs;
3609 
3610     /* Check for bitfield overflow. */
3611     tcg_debug_assert(op->nargs == nargs);
3612 
3613     s->nb_ops++;
3614     return op;
3615 }
3616 
3617 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3618 {
3619     TCGOp *op = tcg_op_alloc(opc, nargs);
3620 
3621     if (tcg_ctx->emit_before_op) {
3622         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3623     } else {
3624         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3625     }
3626     return op;
3627 }
3628 
3629 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3630                             TCGOpcode opc, TCGType type, unsigned nargs)
3631 {
3632     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3633 
3634     TCGOP_TYPE(new_op) = type;
3635     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3636     return new_op;
3637 }
3638 
3639 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3640                            TCGOpcode opc, TCGType type, unsigned nargs)
3641 {
3642     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3643 
3644     TCGOP_TYPE(new_op) = type;
3645     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3646     return new_op;
3647 }
3648 
3649 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3650 {
3651     TCGLabelUse *u;
3652 
3653     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3654         TCGOp *op = u->op;
3655         switch (op->opc) {
3656         case INDEX_op_br:
3657             op->args[0] = label_arg(to);
3658             break;
3659         case INDEX_op_brcond:
3660             op->args[3] = label_arg(to);
3661             break;
3662         case INDEX_op_brcond2_i32:
3663             op->args[5] = label_arg(to);
3664             break;
3665         default:
3666             g_assert_not_reached();
3667         }
3668     }
3669 
3670     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3671 }
3672 
3673 /* Reachable analysis : remove unreachable code.  */
3674 static void __attribute__((noinline))
3675 reachable_code_pass(TCGContext *s)
3676 {
3677     TCGOp *op, *op_next, *op_prev;
3678     bool dead = false;
3679 
3680     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3681         bool remove = dead;
3682         TCGLabel *label;
3683 
3684         switch (op->opc) {
3685         case INDEX_op_set_label:
3686             label = arg_label(op->args[0]);
3687 
3688             /*
3689              * Note that the first op in the TB is always a load,
3690              * so there is always something before a label.
3691              */
3692             op_prev = QTAILQ_PREV(op, link);
3693 
3694             /*
3695              * If we find two sequential labels, move all branches to
3696              * reference the second label and remove the first label.
3697              * Do this before branch to next optimization, so that the
3698              * middle label is out of the way.
3699              */
3700             if (op_prev->opc == INDEX_op_set_label) {
3701                 move_label_uses(label, arg_label(op_prev->args[0]));
3702                 tcg_op_remove(s, op_prev);
3703                 op_prev = QTAILQ_PREV(op, link);
3704             }
3705 
3706             /*
3707              * Optimization can fold conditional branches to unconditional.
3708              * If we find a label which is preceded by an unconditional
3709              * branch to next, remove the branch.  We couldn't do this when
3710              * processing the branch because any dead code between the branch
3711              * and label had not yet been removed.
3712              */
3713             if (op_prev->opc == INDEX_op_br &&
3714                 label == arg_label(op_prev->args[0])) {
3715                 tcg_op_remove(s, op_prev);
3716                 /* Fall through means insns become live again.  */
3717                 dead = false;
3718             }
3719 
3720             if (QSIMPLEQ_EMPTY(&label->branches)) {
3721                 /*
3722                  * While there is an occasional backward branch, virtually
3723                  * all branches generated by the translators are forward.
3724                  * Which means that generally we will have already removed
3725                  * all references to the label that will be, and there is
3726                  * little to be gained by iterating.
3727                  */
3728                 remove = true;
3729             } else {
3730                 /* Once we see a label, insns become live again.  */
3731                 dead = false;
3732                 remove = false;
3733             }
3734             break;
3735 
3736         case INDEX_op_br:
3737         case INDEX_op_exit_tb:
3738         case INDEX_op_goto_ptr:
3739             /* Unconditional branches; everything following is dead.  */
3740             dead = true;
3741             break;
3742 
3743         case INDEX_op_call:
3744             /* Notice noreturn helper calls, raising exceptions.  */
3745             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3746                 dead = true;
3747             }
3748             break;
3749 
3750         case INDEX_op_insn_start:
3751             /* Never remove -- we need to keep these for unwind.  */
3752             remove = false;
3753             break;
3754 
3755         default:
3756             break;
3757         }
3758 
3759         if (remove) {
3760             tcg_op_remove(s, op);
3761         }
3762     }
3763 }
3764 
3765 #define TS_DEAD  1
3766 #define TS_MEM   2
3767 
3768 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3769 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3770 
3771 /* For liveness_pass_1, the register preferences for a given temp.  */
3772 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3773 {
3774     return ts->state_ptr;
3775 }
3776 
3777 /* For liveness_pass_1, reset the preferences for a given temp to the
3778  * maximal regset for its type.
3779  */
3780 static inline void la_reset_pref(TCGTemp *ts)
3781 {
3782     *la_temp_pref(ts)
3783         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3784 }
3785 
3786 /* liveness analysis: end of function: all temps are dead, and globals
3787    should be in memory. */
3788 static void la_func_end(TCGContext *s, int ng, int nt)
3789 {
3790     int i;
3791 
3792     for (i = 0; i < ng; ++i) {
3793         s->temps[i].state = TS_DEAD | TS_MEM;
3794         la_reset_pref(&s->temps[i]);
3795     }
3796     for (i = ng; i < nt; ++i) {
3797         s->temps[i].state = TS_DEAD;
3798         la_reset_pref(&s->temps[i]);
3799     }
3800 }
3801 
3802 /* liveness analysis: end of basic block: all temps are dead, globals
3803    and local temps should be in memory. */
3804 static void la_bb_end(TCGContext *s, int ng, int nt)
3805 {
3806     int i;
3807 
3808     for (i = 0; i < nt; ++i) {
3809         TCGTemp *ts = &s->temps[i];
3810         int state;
3811 
3812         switch (ts->kind) {
3813         case TEMP_FIXED:
3814         case TEMP_GLOBAL:
3815         case TEMP_TB:
3816             state = TS_DEAD | TS_MEM;
3817             break;
3818         case TEMP_EBB:
3819         case TEMP_CONST:
3820             state = TS_DEAD;
3821             break;
3822         default:
3823             g_assert_not_reached();
3824         }
3825         ts->state = state;
3826         la_reset_pref(ts);
3827     }
3828 }
3829 
3830 /* liveness analysis: sync globals back to memory.  */
3831 static void la_global_sync(TCGContext *s, int ng)
3832 {
3833     int i;
3834 
3835     for (i = 0; i < ng; ++i) {
3836         int state = s->temps[i].state;
3837         s->temps[i].state = state | TS_MEM;
3838         if (state == TS_DEAD) {
3839             /* If the global was previously dead, reset prefs.  */
3840             la_reset_pref(&s->temps[i]);
3841         }
3842     }
3843 }
3844 
3845 /*
3846  * liveness analysis: conditional branch: all temps are dead unless
3847  * explicitly live-across-conditional-branch, globals and local temps
3848  * should be synced.
3849  */
3850 static void la_bb_sync(TCGContext *s, int ng, int nt)
3851 {
3852     la_global_sync(s, ng);
3853 
3854     for (int i = ng; i < nt; ++i) {
3855         TCGTemp *ts = &s->temps[i];
3856         int state;
3857 
3858         switch (ts->kind) {
3859         case TEMP_TB:
3860             state = ts->state;
3861             ts->state = state | TS_MEM;
3862             if (state != TS_DEAD) {
3863                 continue;
3864             }
3865             break;
3866         case TEMP_EBB:
3867         case TEMP_CONST:
3868             continue;
3869         default:
3870             g_assert_not_reached();
3871         }
3872         la_reset_pref(&s->temps[i]);
3873     }
3874 }
3875 
3876 /* liveness analysis: sync globals back to memory and kill.  */
3877 static void la_global_kill(TCGContext *s, int ng)
3878 {
3879     int i;
3880 
3881     for (i = 0; i < ng; i++) {
3882         s->temps[i].state = TS_DEAD | TS_MEM;
3883         la_reset_pref(&s->temps[i]);
3884     }
3885 }
3886 
3887 /* liveness analysis: note live globals crossing calls.  */
3888 static void la_cross_call(TCGContext *s, int nt)
3889 {
3890     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3891     int i;
3892 
3893     for (i = 0; i < nt; i++) {
3894         TCGTemp *ts = &s->temps[i];
3895         if (!(ts->state & TS_DEAD)) {
3896             TCGRegSet *pset = la_temp_pref(ts);
3897             TCGRegSet set = *pset;
3898 
3899             set &= mask;
3900             /* If the combination is not possible, restart.  */
3901             if (set == 0) {
3902                 set = tcg_target_available_regs[ts->type] & mask;
3903             }
3904             *pset = set;
3905         }
3906     }
3907 }
3908 
3909 /*
3910  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3911  * to TEMP_EBB, if possible.
3912  */
3913 static void __attribute__((noinline))
3914 liveness_pass_0(TCGContext *s)
3915 {
3916     void * const multiple_ebb = (void *)(uintptr_t)-1;
3917     int nb_temps = s->nb_temps;
3918     TCGOp *op, *ebb;
3919 
3920     for (int i = s->nb_globals; i < nb_temps; ++i) {
3921         s->temps[i].state_ptr = NULL;
3922     }
3923 
3924     /*
3925      * Represent each EBB by the op at which it begins.  In the case of
3926      * the first EBB, this is the first op, otherwise it is a label.
3927      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3928      * within a single EBB, else MULTIPLE_EBB.
3929      */
3930     ebb = QTAILQ_FIRST(&s->ops);
3931     QTAILQ_FOREACH(op, &s->ops, link) {
3932         const TCGOpDef *def;
3933         int nb_oargs, nb_iargs;
3934 
3935         switch (op->opc) {
3936         case INDEX_op_set_label:
3937             ebb = op;
3938             continue;
3939         case INDEX_op_discard:
3940             continue;
3941         case INDEX_op_call:
3942             nb_oargs = TCGOP_CALLO(op);
3943             nb_iargs = TCGOP_CALLI(op);
3944             break;
3945         default:
3946             def = &tcg_op_defs[op->opc];
3947             nb_oargs = def->nb_oargs;
3948             nb_iargs = def->nb_iargs;
3949             break;
3950         }
3951 
3952         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3953             TCGTemp *ts = arg_temp(op->args[i]);
3954 
3955             if (ts->kind != TEMP_TB) {
3956                 continue;
3957             }
3958             if (ts->state_ptr == NULL) {
3959                 ts->state_ptr = ebb;
3960             } else if (ts->state_ptr != ebb) {
3961                 ts->state_ptr = multiple_ebb;
3962             }
3963         }
3964     }
3965 
3966     /*
3967      * For TEMP_TB that turned out not to be used beyond one EBB,
3968      * reduce the liveness to TEMP_EBB.
3969      */
3970     for (int i = s->nb_globals; i < nb_temps; ++i) {
3971         TCGTemp *ts = &s->temps[i];
3972         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3973             ts->kind = TEMP_EBB;
3974         }
3975     }
3976 }
3977 
3978 static void assert_carry_dead(TCGContext *s)
3979 {
3980     /*
3981      * Carry operations can be separated by a few insns like mov,
3982      * load or store, but they should always be "close", and
3983      * carry-out operations should always be paired with carry-in.
3984      * At various boundaries, carry must have been consumed.
3985      */
3986     tcg_debug_assert(!s->carry_live);
3987 }
3988 
3989 /* Liveness analysis : update the opc_arg_life array to tell if a
3990    given input arguments is dead. Instructions updating dead
3991    temporaries are removed. */
3992 static void __attribute__((noinline))
3993 liveness_pass_1(TCGContext *s)
3994 {
3995     int nb_globals = s->nb_globals;
3996     int nb_temps = s->nb_temps;
3997     TCGOp *op, *op_prev;
3998     TCGRegSet *prefs;
3999 
4000     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
4001     for (int i = 0; i < nb_temps; ++i) {
4002         s->temps[i].state_ptr = prefs + i;
4003     }
4004 
4005     /* ??? Should be redundant with the exit_tb that ends the TB.  */
4006     la_func_end(s, nb_globals, nb_temps);
4007 
4008     s->carry_live = false;
4009     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
4010         int nb_iargs, nb_oargs;
4011         TCGOpcode opc_new, opc_new2;
4012         TCGLifeData arg_life = 0;
4013         TCGTemp *ts;
4014         TCGOpcode opc = op->opc;
4015         const TCGOpDef *def;
4016         const TCGArgConstraint *args_ct;
4017 
4018         switch (opc) {
4019         case INDEX_op_call:
4020             assert_carry_dead(s);
4021             {
4022                 const TCGHelperInfo *info = tcg_call_info(op);
4023                 int call_flags = tcg_call_flags(op);
4024 
4025                 nb_oargs = TCGOP_CALLO(op);
4026                 nb_iargs = TCGOP_CALLI(op);
4027 
4028                 /* pure functions can be removed if their result is unused */
4029                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
4030                     for (int i = 0; i < nb_oargs; i++) {
4031                         ts = arg_temp(op->args[i]);
4032                         if (ts->state != TS_DEAD) {
4033                             goto do_not_remove_call;
4034                         }
4035                     }
4036                     goto do_remove;
4037                 }
4038             do_not_remove_call:
4039 
4040                 /* Output args are dead.  */
4041                 for (int i = 0; i < nb_oargs; i++) {
4042                     ts = arg_temp(op->args[i]);
4043                     if (ts->state & TS_DEAD) {
4044                         arg_life |= DEAD_ARG << i;
4045                     }
4046                     if (ts->state & TS_MEM) {
4047                         arg_life |= SYNC_ARG << i;
4048                     }
4049                     ts->state = TS_DEAD;
4050                     la_reset_pref(ts);
4051                 }
4052 
4053                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
4054                 memset(op->output_pref, 0, sizeof(op->output_pref));
4055 
4056                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
4057                                     TCG_CALL_NO_READ_GLOBALS))) {
4058                     la_global_kill(s, nb_globals);
4059                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
4060                     la_global_sync(s, nb_globals);
4061                 }
4062 
4063                 /* Record arguments that die in this helper.  */
4064                 for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4065                     ts = arg_temp(op->args[i]);
4066                     if (ts->state & TS_DEAD) {
4067                         arg_life |= DEAD_ARG << i;
4068                     }
4069                 }
4070 
4071                 /* For all live registers, remove call-clobbered prefs.  */
4072                 la_cross_call(s, nb_temps);
4073 
4074                 /*
4075                  * Input arguments are live for preceding opcodes.
4076                  *
4077                  * For those arguments that die, and will be allocated in
4078                  * registers, clear the register set for that arg, to be
4079                  * filled in below.  For args that will be on the stack,
4080                  * reset to any available reg.  Process arguments in reverse
4081                  * order so that if a temp is used more than once, the stack
4082                  * reset to max happens before the register reset to 0.
4083                  */
4084                 for (int i = nb_iargs - 1; i >= 0; i--) {
4085                     const TCGCallArgumentLoc *loc = &info->in[i];
4086                     ts = arg_temp(op->args[nb_oargs + i]);
4087 
4088                     if (ts->state & TS_DEAD) {
4089                         switch (loc->kind) {
4090                         case TCG_CALL_ARG_NORMAL:
4091                         case TCG_CALL_ARG_EXTEND_U:
4092                         case TCG_CALL_ARG_EXTEND_S:
4093                             if (arg_slot_reg_p(loc->arg_slot)) {
4094                                 *la_temp_pref(ts) = 0;
4095                                 break;
4096                             }
4097                             /* fall through */
4098                         default:
4099                             *la_temp_pref(ts) =
4100                                 tcg_target_available_regs[ts->type];
4101                             break;
4102                         }
4103                         ts->state &= ~TS_DEAD;
4104                     }
4105                 }
4106 
4107                 /*
4108                  * For each input argument, add its input register to prefs.
4109                  * If a temp is used once, this produces a single set bit;
4110                  * if a temp is used multiple times, this produces a set.
4111                  */
4112                 for (int i = 0; i < nb_iargs; i++) {
4113                     const TCGCallArgumentLoc *loc = &info->in[i];
4114                     ts = arg_temp(op->args[nb_oargs + i]);
4115 
4116                     switch (loc->kind) {
4117                     case TCG_CALL_ARG_NORMAL:
4118                     case TCG_CALL_ARG_EXTEND_U:
4119                     case TCG_CALL_ARG_EXTEND_S:
4120                         if (arg_slot_reg_p(loc->arg_slot)) {
4121                             tcg_regset_set_reg(*la_temp_pref(ts),
4122                                 tcg_target_call_iarg_regs[loc->arg_slot]);
4123                         }
4124                         break;
4125                     default:
4126                         break;
4127                     }
4128                 }
4129             }
4130             break;
4131         case INDEX_op_insn_start:
4132             assert_carry_dead(s);
4133             break;
4134         case INDEX_op_discard:
4135             /* mark the temporary as dead */
4136             ts = arg_temp(op->args[0]);
4137             ts->state = TS_DEAD;
4138             la_reset_pref(ts);
4139             break;
4140 
4141         case INDEX_op_muls2:
4142             opc_new = INDEX_op_mul;
4143             opc_new2 = INDEX_op_mulsh;
4144             goto do_mul2;
4145         case INDEX_op_mulu2:
4146             opc_new = INDEX_op_mul;
4147             opc_new2 = INDEX_op_muluh;
4148         do_mul2:
4149             assert_carry_dead(s);
4150             if (arg_temp(op->args[1])->state == TS_DEAD) {
4151                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4152                     /* Both parts of the operation are dead.  */
4153                     goto do_remove;
4154                 }
4155                 /* The high part of the operation is dead; generate the low. */
4156                 op->opc = opc = opc_new;
4157                 op->args[1] = op->args[2];
4158                 op->args[2] = op->args[3];
4159             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4160                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4161                 /* The low part of the operation is dead; generate the high. */
4162                 op->opc = opc = opc_new2;
4163                 op->args[0] = op->args[1];
4164                 op->args[1] = op->args[2];
4165                 op->args[2] = op->args[3];
4166             } else {
4167                 goto do_not_remove;
4168             }
4169             /* Mark the single-word operation live.  */
4170             goto do_not_remove;
4171 
4172         case INDEX_op_addco:
4173             if (s->carry_live) {
4174                 goto do_not_remove;
4175             }
4176             op->opc = opc = INDEX_op_add;
4177             goto do_default;
4178 
4179         case INDEX_op_addcio:
4180             if (s->carry_live) {
4181                 goto do_not_remove;
4182             }
4183             op->opc = opc = INDEX_op_addci;
4184             goto do_default;
4185 
4186         case INDEX_op_subbo:
4187             if (s->carry_live) {
4188                 goto do_not_remove;
4189             }
4190             /* Lower to sub, but this may also require canonicalization. */
4191             op->opc = opc = INDEX_op_sub;
4192             ts = arg_temp(op->args[2]);
4193             if (ts->kind == TEMP_CONST) {
4194                 ts = tcg_constant_internal(ts->type, -ts->val);
4195                 if (ts->state_ptr == NULL) {
4196                     tcg_debug_assert(temp_idx(ts) == nb_temps);
4197                     nb_temps++;
4198                     ts->state_ptr = tcg_malloc(sizeof(TCGRegSet));
4199                     ts->state = TS_DEAD;
4200                     la_reset_pref(ts);
4201                 }
4202                 op->args[2] = temp_arg(ts);
4203                 op->opc = opc = INDEX_op_add;
4204             }
4205             goto do_default;
4206 
4207         case INDEX_op_subbio:
4208             if (s->carry_live) {
4209                 goto do_not_remove;
4210             }
4211             op->opc = opc = INDEX_op_subbi;
4212             goto do_default;
4213 
4214         case INDEX_op_addc1o:
4215             if (s->carry_live) {
4216                 goto do_not_remove;
4217             }
4218             /* Lower to add, add +1. */
4219             op_prev = tcg_op_insert_before(s, op, INDEX_op_add,
4220                                            TCGOP_TYPE(op), 3);
4221             op_prev->args[0] = op->args[0];
4222             op_prev->args[1] = op->args[1];
4223             op_prev->args[2] = op->args[2];
4224             op->opc = opc = INDEX_op_add;
4225             op->args[1] = op->args[0];
4226             ts = arg_temp(op->args[0]);
4227             ts = tcg_constant_internal(ts->type, 1);
4228             op->args[2] = temp_arg(ts);
4229             goto do_default;
4230 
4231         case INDEX_op_subb1o:
4232             if (s->carry_live) {
4233                 goto do_not_remove;
4234             }
4235             /* Lower to sub, add -1. */
4236             op_prev = tcg_op_insert_before(s, op, INDEX_op_sub,
4237                                            TCGOP_TYPE(op), 3);
4238             op_prev->args[0] = op->args[0];
4239             op_prev->args[1] = op->args[1];
4240             op_prev->args[2] = op->args[2];
4241             op->opc = opc = INDEX_op_add;
4242             op->args[1] = op->args[0];
4243             ts = arg_temp(op->args[0]);
4244             ts = tcg_constant_internal(ts->type, -1);
4245             op->args[2] = temp_arg(ts);
4246             goto do_default;
4247 
4248         default:
4249         do_default:
4250             /*
4251              * Test if the operation can be removed because all
4252              * its outputs are dead. We assume that nb_oargs == 0
4253              * implies side effects.
4254              */
4255             def = &tcg_op_defs[opc];
4256             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) {
4257                 for (int i = def->nb_oargs - 1; i >= 0; i--) {
4258                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4259                         goto do_not_remove;
4260                     }
4261                 }
4262                 goto do_remove;
4263             }
4264             goto do_not_remove;
4265 
4266         do_remove:
4267             tcg_op_remove(s, op);
4268             break;
4269 
4270         do_not_remove:
4271             def = &tcg_op_defs[opc];
4272             nb_iargs = def->nb_iargs;
4273             nb_oargs = def->nb_oargs;
4274 
4275             for (int i = 0; i < nb_oargs; i++) {
4276                 ts = arg_temp(op->args[i]);
4277 
4278                 /* Remember the preference of the uses that followed.  */
4279                 if (i < ARRAY_SIZE(op->output_pref)) {
4280                     op->output_pref[i] = *la_temp_pref(ts);
4281                 }
4282 
4283                 /* Output args are dead.  */
4284                 if (ts->state & TS_DEAD) {
4285                     arg_life |= DEAD_ARG << i;
4286                 }
4287                 if (ts->state & TS_MEM) {
4288                     arg_life |= SYNC_ARG << i;
4289                 }
4290                 ts->state = TS_DEAD;
4291                 la_reset_pref(ts);
4292             }
4293 
4294             /* If end of basic block, update.  */
4295             if (def->flags & TCG_OPF_BB_EXIT) {
4296                 assert_carry_dead(s);
4297                 la_func_end(s, nb_globals, nb_temps);
4298             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4299                 assert_carry_dead(s);
4300                 la_bb_sync(s, nb_globals, nb_temps);
4301             } else if (def->flags & TCG_OPF_BB_END) {
4302                 assert_carry_dead(s);
4303                 la_bb_end(s, nb_globals, nb_temps);
4304             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4305                 assert_carry_dead(s);
4306                 la_global_sync(s, nb_globals);
4307                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4308                     la_cross_call(s, nb_temps);
4309                 }
4310             }
4311 
4312             /* Record arguments that die in this opcode.  */
4313             for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4314                 ts = arg_temp(op->args[i]);
4315                 if (ts->state & TS_DEAD) {
4316                     arg_life |= DEAD_ARG << i;
4317                 }
4318             }
4319             if (def->flags & TCG_OPF_CARRY_OUT) {
4320                 s->carry_live = false;
4321             }
4322 
4323             /* Input arguments are live for preceding opcodes.  */
4324             for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4325                 ts = arg_temp(op->args[i]);
4326                 if (ts->state & TS_DEAD) {
4327                     /* For operands that were dead, initially allow
4328                        all regs for the type.  */
4329                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4330                     ts->state &= ~TS_DEAD;
4331                 }
4332             }
4333             if (def->flags & TCG_OPF_CARRY_IN) {
4334                 s->carry_live = true;
4335             }
4336 
4337             /* Incorporate constraints for this operand.  */
4338             switch (opc) {
4339             case INDEX_op_mov:
4340                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4341                    have proper constraints.  That said, special case
4342                    moves to propagate preferences backward.  */
4343                 if (IS_DEAD_ARG(1)) {
4344                     *la_temp_pref(arg_temp(op->args[0]))
4345                         = *la_temp_pref(arg_temp(op->args[1]));
4346                 }
4347                 break;
4348 
4349             default:
4350                 args_ct = opcode_args_ct(op);
4351                 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4352                     const TCGArgConstraint *ct = &args_ct[i];
4353                     TCGRegSet set, *pset;
4354 
4355                     ts = arg_temp(op->args[i]);
4356                     pset = la_temp_pref(ts);
4357                     set = *pset;
4358 
4359                     set &= ct->regs;
4360                     if (ct->ialias) {
4361                         set &= output_pref(op, ct->alias_index);
4362                     }
4363                     /* If the combination is not possible, restart.  */
4364                     if (set == 0) {
4365                         set = ct->regs;
4366                     }
4367                     *pset = set;
4368                 }
4369                 break;
4370             }
4371             break;
4372         }
4373         op->life = arg_life;
4374     }
4375     assert_carry_dead(s);
4376 }
4377 
4378 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4379 static bool __attribute__((noinline))
4380 liveness_pass_2(TCGContext *s)
4381 {
4382     int nb_globals = s->nb_globals;
4383     int nb_temps, i;
4384     bool changes = false;
4385     TCGOp *op, *op_next;
4386 
4387     /* Create a temporary for each indirect global.  */
4388     for (i = 0; i < nb_globals; ++i) {
4389         TCGTemp *its = &s->temps[i];
4390         if (its->indirect_reg) {
4391             TCGTemp *dts = tcg_temp_alloc(s);
4392             dts->type = its->type;
4393             dts->base_type = its->base_type;
4394             dts->temp_subindex = its->temp_subindex;
4395             dts->kind = TEMP_EBB;
4396             its->state_ptr = dts;
4397         } else {
4398             its->state_ptr = NULL;
4399         }
4400         /* All globals begin dead.  */
4401         its->state = TS_DEAD;
4402     }
4403     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4404         TCGTemp *its = &s->temps[i];
4405         its->state_ptr = NULL;
4406         its->state = TS_DEAD;
4407     }
4408 
4409     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4410         TCGOpcode opc = op->opc;
4411         const TCGOpDef *def = &tcg_op_defs[opc];
4412         TCGLifeData arg_life = op->life;
4413         int nb_iargs, nb_oargs, call_flags;
4414         TCGTemp *arg_ts, *dir_ts;
4415 
4416         if (opc == INDEX_op_call) {
4417             nb_oargs = TCGOP_CALLO(op);
4418             nb_iargs = TCGOP_CALLI(op);
4419             call_flags = tcg_call_flags(op);
4420         } else {
4421             nb_iargs = def->nb_iargs;
4422             nb_oargs = def->nb_oargs;
4423 
4424             /* Set flags similar to how calls require.  */
4425             if (def->flags & TCG_OPF_COND_BRANCH) {
4426                 /* Like reading globals: sync_globals */
4427                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4428             } else if (def->flags & TCG_OPF_BB_END) {
4429                 /* Like writing globals: save_globals */
4430                 call_flags = 0;
4431             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4432                 /* Like reading globals: sync_globals */
4433                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4434             } else {
4435                 /* No effect on globals.  */
4436                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4437                               TCG_CALL_NO_WRITE_GLOBALS);
4438             }
4439         }
4440 
4441         /* Make sure that input arguments are available.  */
4442         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4443             arg_ts = arg_temp(op->args[i]);
4444             dir_ts = arg_ts->state_ptr;
4445             if (dir_ts && arg_ts->state == TS_DEAD) {
4446                 TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld,
4447                                                   arg_ts->type, 3);
4448 
4449                 lop->args[0] = temp_arg(dir_ts);
4450                 lop->args[1] = temp_arg(arg_ts->mem_base);
4451                 lop->args[2] = arg_ts->mem_offset;
4452 
4453                 /* Loaded, but synced with memory.  */
4454                 arg_ts->state = TS_MEM;
4455             }
4456         }
4457 
4458         /* Perform input replacement, and mark inputs that became dead.
4459            No action is required except keeping temp_state up to date
4460            so that we reload when needed.  */
4461         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4462             arg_ts = arg_temp(op->args[i]);
4463             dir_ts = arg_ts->state_ptr;
4464             if (dir_ts) {
4465                 op->args[i] = temp_arg(dir_ts);
4466                 changes = true;
4467                 if (IS_DEAD_ARG(i)) {
4468                     arg_ts->state = TS_DEAD;
4469                 }
4470             }
4471         }
4472 
4473         /* Liveness analysis should ensure that the following are
4474            all correct, for call sites and basic block end points.  */
4475         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4476             /* Nothing to do */
4477         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4478             for (i = 0; i < nb_globals; ++i) {
4479                 /* Liveness should see that globals are synced back,
4480                    that is, either TS_DEAD or TS_MEM.  */
4481                 arg_ts = &s->temps[i];
4482                 tcg_debug_assert(arg_ts->state_ptr == 0
4483                                  || arg_ts->state != 0);
4484             }
4485         } else {
4486             for (i = 0; i < nb_globals; ++i) {
4487                 /* Liveness should see that globals are saved back,
4488                    that is, TS_DEAD, waiting to be reloaded.  */
4489                 arg_ts = &s->temps[i];
4490                 tcg_debug_assert(arg_ts->state_ptr == 0
4491                                  || arg_ts->state == TS_DEAD);
4492             }
4493         }
4494 
4495         /* Outputs become available.  */
4496         if (opc == INDEX_op_mov) {
4497             arg_ts = arg_temp(op->args[0]);
4498             dir_ts = arg_ts->state_ptr;
4499             if (dir_ts) {
4500                 op->args[0] = temp_arg(dir_ts);
4501                 changes = true;
4502 
4503                 /* The output is now live and modified.  */
4504                 arg_ts->state = 0;
4505 
4506                 if (NEED_SYNC_ARG(0)) {
4507                     TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st,
4508                                                      arg_ts->type, 3);
4509                     TCGTemp *out_ts = dir_ts;
4510 
4511                     if (IS_DEAD_ARG(0)) {
4512                         out_ts = arg_temp(op->args[1]);
4513                         arg_ts->state = TS_DEAD;
4514                         tcg_op_remove(s, op);
4515                     } else {
4516                         arg_ts->state = TS_MEM;
4517                     }
4518 
4519                     sop->args[0] = temp_arg(out_ts);
4520                     sop->args[1] = temp_arg(arg_ts->mem_base);
4521                     sop->args[2] = arg_ts->mem_offset;
4522                 } else {
4523                     tcg_debug_assert(!IS_DEAD_ARG(0));
4524                 }
4525             }
4526         } else {
4527             for (i = 0; i < nb_oargs; i++) {
4528                 arg_ts = arg_temp(op->args[i]);
4529                 dir_ts = arg_ts->state_ptr;
4530                 if (!dir_ts) {
4531                     continue;
4532                 }
4533                 op->args[i] = temp_arg(dir_ts);
4534                 changes = true;
4535 
4536                 /* The output is now live and modified.  */
4537                 arg_ts->state = 0;
4538 
4539                 /* Sync outputs upon their last write.  */
4540                 if (NEED_SYNC_ARG(i)) {
4541                     TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st,
4542                                                      arg_ts->type, 3);
4543 
4544                     sop->args[0] = temp_arg(dir_ts);
4545                     sop->args[1] = temp_arg(arg_ts->mem_base);
4546                     sop->args[2] = arg_ts->mem_offset;
4547 
4548                     arg_ts->state = TS_MEM;
4549                 }
4550                 /* Drop outputs that are dead.  */
4551                 if (IS_DEAD_ARG(i)) {
4552                     arg_ts->state = TS_DEAD;
4553                 }
4554             }
4555         }
4556     }
4557 
4558     return changes;
4559 }
4560 
4561 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4562 {
4563     intptr_t off;
4564     int size, align;
4565 
4566     /* When allocating an object, look at the full type. */
4567     size = tcg_type_size(ts->base_type);
4568     switch (ts->base_type) {
4569     case TCG_TYPE_I32:
4570         align = 4;
4571         break;
4572     case TCG_TYPE_I64:
4573     case TCG_TYPE_V64:
4574         align = 8;
4575         break;
4576     case TCG_TYPE_I128:
4577     case TCG_TYPE_V128:
4578     case TCG_TYPE_V256:
4579         /*
4580          * Note that we do not require aligned storage for V256,
4581          * and that we provide alignment for I128 to match V128,
4582          * even if that's above what the host ABI requires.
4583          */
4584         align = 16;
4585         break;
4586     default:
4587         g_assert_not_reached();
4588     }
4589 
4590     /*
4591      * Assume the stack is sufficiently aligned.
4592      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4593      * and do not require 16 byte vector alignment.  This seems slightly
4594      * easier than fully parameterizing the above switch statement.
4595      */
4596     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4597     off = ROUND_UP(s->current_frame_offset, align);
4598 
4599     /* If we've exhausted the stack frame, restart with a smaller TB. */
4600     if (off + size > s->frame_end) {
4601         tcg_raise_tb_overflow(s);
4602     }
4603     s->current_frame_offset = off + size;
4604 #if defined(__sparc__)
4605     off += TCG_TARGET_STACK_BIAS;
4606 #endif
4607 
4608     /* If the object was subdivided, assign memory to all the parts. */
4609     if (ts->base_type != ts->type) {
4610         int part_size = tcg_type_size(ts->type);
4611         int part_count = size / part_size;
4612 
4613         /*
4614          * Each part is allocated sequentially in tcg_temp_new_internal.
4615          * Jump back to the first part by subtracting the current index.
4616          */
4617         ts -= ts->temp_subindex;
4618         for (int i = 0; i < part_count; ++i) {
4619             ts[i].mem_offset = off + i * part_size;
4620             ts[i].mem_base = s->frame_temp;
4621             ts[i].mem_allocated = 1;
4622         }
4623     } else {
4624         ts->mem_offset = off;
4625         ts->mem_base = s->frame_temp;
4626         ts->mem_allocated = 1;
4627     }
4628 }
4629 
4630 /* Assign @reg to @ts, and update reg_to_temp[]. */
4631 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4632 {
4633     if (ts->val_type == TEMP_VAL_REG) {
4634         TCGReg old = ts->reg;
4635         tcg_debug_assert(s->reg_to_temp[old] == ts);
4636         if (old == reg) {
4637             return;
4638         }
4639         s->reg_to_temp[old] = NULL;
4640     }
4641     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4642     s->reg_to_temp[reg] = ts;
4643     ts->val_type = TEMP_VAL_REG;
4644     ts->reg = reg;
4645 }
4646 
4647 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4648 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4649 {
4650     tcg_debug_assert(type != TEMP_VAL_REG);
4651     if (ts->val_type == TEMP_VAL_REG) {
4652         TCGReg reg = ts->reg;
4653         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4654         s->reg_to_temp[reg] = NULL;
4655     }
4656     ts->val_type = type;
4657 }
4658 
4659 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4660 
4661 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4662    mark it free; otherwise mark it dead.  */
4663 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4664 {
4665     TCGTempVal new_type;
4666 
4667     switch (ts->kind) {
4668     case TEMP_FIXED:
4669         return;
4670     case TEMP_GLOBAL:
4671     case TEMP_TB:
4672         new_type = TEMP_VAL_MEM;
4673         break;
4674     case TEMP_EBB:
4675         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4676         break;
4677     case TEMP_CONST:
4678         new_type = TEMP_VAL_CONST;
4679         break;
4680     default:
4681         g_assert_not_reached();
4682     }
4683     set_temp_val_nonreg(s, ts, new_type);
4684 }
4685 
4686 /* Mark a temporary as dead.  */
4687 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4688 {
4689     temp_free_or_dead(s, ts, 1);
4690 }
4691 
4692 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4693    registers needs to be allocated to store a constant.  If 'free_or_dead'
4694    is non-zero, subsequently release the temporary; if it is positive, the
4695    temp is dead; if it is negative, the temp is free.  */
4696 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4697                       TCGRegSet preferred_regs, int free_or_dead)
4698 {
4699     if (!temp_readonly(ts) && !ts->mem_coherent) {
4700         if (!ts->mem_allocated) {
4701             temp_allocate_frame(s, ts);
4702         }
4703         switch (ts->val_type) {
4704         case TEMP_VAL_CONST:
4705             /* If we're going to free the temp immediately, then we won't
4706                require it later in a register, so attempt to store the
4707                constant to memory directly.  */
4708             if (free_or_dead
4709                 && tcg_out_sti(s, ts->type, ts->val,
4710                                ts->mem_base->reg, ts->mem_offset)) {
4711                 break;
4712             }
4713             temp_load(s, ts, tcg_target_available_regs[ts->type],
4714                       allocated_regs, preferred_regs);
4715             /* fallthrough */
4716 
4717         case TEMP_VAL_REG:
4718             tcg_out_st(s, ts->type, ts->reg,
4719                        ts->mem_base->reg, ts->mem_offset);
4720             break;
4721 
4722         case TEMP_VAL_MEM:
4723             break;
4724 
4725         case TEMP_VAL_DEAD:
4726         default:
4727             g_assert_not_reached();
4728         }
4729         ts->mem_coherent = 1;
4730     }
4731     if (free_or_dead) {
4732         temp_free_or_dead(s, ts, free_or_dead);
4733     }
4734 }
4735 
4736 /* free register 'reg' by spilling the corresponding temporary if necessary */
4737 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4738 {
4739     TCGTemp *ts = s->reg_to_temp[reg];
4740     if (ts != NULL) {
4741         temp_sync(s, ts, allocated_regs, 0, -1);
4742     }
4743 }
4744 
4745 /**
4746  * tcg_reg_alloc:
4747  * @required_regs: Set of registers in which we must allocate.
4748  * @allocated_regs: Set of registers which must be avoided.
4749  * @preferred_regs: Set of registers we should prefer.
4750  * @rev: True if we search the registers in "indirect" order.
4751  *
4752  * The allocated register must be in @required_regs & ~@allocated_regs,
4753  * but if we can put it in @preferred_regs we may save a move later.
4754  */
4755 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4756                             TCGRegSet allocated_regs,
4757                             TCGRegSet preferred_regs, bool rev)
4758 {
4759     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4760     TCGRegSet reg_ct[2];
4761     const int *order;
4762 
4763     reg_ct[1] = required_regs & ~allocated_regs;
4764     tcg_debug_assert(reg_ct[1] != 0);
4765     reg_ct[0] = reg_ct[1] & preferred_regs;
4766 
4767     /* Skip the preferred_regs option if it cannot be satisfied,
4768        or if the preference made no difference.  */
4769     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4770 
4771     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4772 
4773     /* Try free registers, preferences first.  */
4774     for (j = f; j < 2; j++) {
4775         TCGRegSet set = reg_ct[j];
4776 
4777         if (tcg_regset_single(set)) {
4778             /* One register in the set.  */
4779             TCGReg reg = tcg_regset_first(set);
4780             if (s->reg_to_temp[reg] == NULL) {
4781                 return reg;
4782             }
4783         } else {
4784             for (i = 0; i < n; i++) {
4785                 TCGReg reg = order[i];
4786                 if (s->reg_to_temp[reg] == NULL &&
4787                     tcg_regset_test_reg(set, reg)) {
4788                     return reg;
4789                 }
4790             }
4791         }
4792     }
4793 
4794     /* We must spill something.  */
4795     for (j = f; j < 2; j++) {
4796         TCGRegSet set = reg_ct[j];
4797 
4798         if (tcg_regset_single(set)) {
4799             /* One register in the set.  */
4800             TCGReg reg = tcg_regset_first(set);
4801             tcg_reg_free(s, reg, allocated_regs);
4802             return reg;
4803         } else {
4804             for (i = 0; i < n; i++) {
4805                 TCGReg reg = order[i];
4806                 if (tcg_regset_test_reg(set, reg)) {
4807                     tcg_reg_free(s, reg, allocated_regs);
4808                     return reg;
4809                 }
4810             }
4811         }
4812     }
4813 
4814     g_assert_not_reached();
4815 }
4816 
4817 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4818                                  TCGRegSet allocated_regs,
4819                                  TCGRegSet preferred_regs, bool rev)
4820 {
4821     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4822     TCGRegSet reg_ct[2];
4823     const int *order;
4824 
4825     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4826     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4827     tcg_debug_assert(reg_ct[1] != 0);
4828     reg_ct[0] = reg_ct[1] & preferred_regs;
4829 
4830     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4831 
4832     /*
4833      * Skip the preferred_regs option if it cannot be satisfied,
4834      * or if the preference made no difference.
4835      */
4836     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4837 
4838     /*
4839      * Minimize the number of flushes by looking for 2 free registers first,
4840      * then a single flush, then two flushes.
4841      */
4842     for (fmin = 2; fmin >= 0; fmin--) {
4843         for (j = k; j < 2; j++) {
4844             TCGRegSet set = reg_ct[j];
4845 
4846             for (i = 0; i < n; i++) {
4847                 TCGReg reg = order[i];
4848 
4849                 if (tcg_regset_test_reg(set, reg)) {
4850                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4851                     if (f >= fmin) {
4852                         tcg_reg_free(s, reg, allocated_regs);
4853                         tcg_reg_free(s, reg + 1, allocated_regs);
4854                         return reg;
4855                     }
4856                 }
4857             }
4858         }
4859     }
4860     g_assert_not_reached();
4861 }
4862 
4863 /* Make sure the temporary is in a register.  If needed, allocate the register
4864    from DESIRED while avoiding ALLOCATED.  */
4865 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4866                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4867 {
4868     TCGReg reg;
4869 
4870     switch (ts->val_type) {
4871     case TEMP_VAL_REG:
4872         return;
4873     case TEMP_VAL_CONST:
4874         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4875                             preferred_regs, ts->indirect_base);
4876         if (ts->type <= TCG_TYPE_I64) {
4877             tcg_out_movi(s, ts->type, reg, ts->val);
4878         } else {
4879             uint64_t val = ts->val;
4880             MemOp vece = MO_64;
4881 
4882             /*
4883              * Find the minimal vector element that matches the constant.
4884              * The targets will, in general, have to do this search anyway,
4885              * do this generically.
4886              */
4887             if (val == dup_const(MO_8, val)) {
4888                 vece = MO_8;
4889             } else if (val == dup_const(MO_16, val)) {
4890                 vece = MO_16;
4891             } else if (val == dup_const(MO_32, val)) {
4892                 vece = MO_32;
4893             }
4894 
4895             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4896         }
4897         ts->mem_coherent = 0;
4898         break;
4899     case TEMP_VAL_MEM:
4900         if (!ts->mem_allocated) {
4901             temp_allocate_frame(s, ts);
4902         }
4903         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4904                             preferred_regs, ts->indirect_base);
4905         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4906         ts->mem_coherent = 1;
4907         break;
4908     case TEMP_VAL_DEAD:
4909     default:
4910         g_assert_not_reached();
4911     }
4912     set_temp_val_reg(s, ts, reg);
4913 }
4914 
4915 /* Save a temporary to memory. 'allocated_regs' is used in case a
4916    temporary registers needs to be allocated to store a constant.  */
4917 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4918 {
4919     /* The liveness analysis already ensures that globals are back
4920        in memory. Keep an tcg_debug_assert for safety. */
4921     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4922 }
4923 
4924 /* save globals to their canonical location and assume they can be
4925    modified be the following code. 'allocated_regs' is used in case a
4926    temporary registers needs to be allocated to store a constant. */
4927 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4928 {
4929     int i, n;
4930 
4931     for (i = 0, n = s->nb_globals; i < n; i++) {
4932         temp_save(s, &s->temps[i], allocated_regs);
4933     }
4934 }
4935 
4936 /* sync globals to their canonical location and assume they can be
4937    read by the following code. 'allocated_regs' is used in case a
4938    temporary registers needs to be allocated to store a constant. */
4939 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4940 {
4941     int i, n;
4942 
4943     for (i = 0, n = s->nb_globals; i < n; i++) {
4944         TCGTemp *ts = &s->temps[i];
4945         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4946                          || ts->kind == TEMP_FIXED
4947                          || ts->mem_coherent);
4948     }
4949 }
4950 
4951 /* at the end of a basic block, we assume all temporaries are dead and
4952    all globals are stored at their canonical location. */
4953 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4954 {
4955     assert_carry_dead(s);
4956     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4957         TCGTemp *ts = &s->temps[i];
4958 
4959         switch (ts->kind) {
4960         case TEMP_TB:
4961             temp_save(s, ts, allocated_regs);
4962             break;
4963         case TEMP_EBB:
4964             /* The liveness analysis already ensures that temps are dead.
4965                Keep an tcg_debug_assert for safety. */
4966             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4967             break;
4968         case TEMP_CONST:
4969             /* Similarly, we should have freed any allocated register. */
4970             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4971             break;
4972         default:
4973             g_assert_not_reached();
4974         }
4975     }
4976 
4977     save_globals(s, allocated_regs);
4978 }
4979 
4980 /*
4981  * At a conditional branch, we assume all temporaries are dead unless
4982  * explicitly live-across-conditional-branch; all globals and local
4983  * temps are synced to their location.
4984  */
4985 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4986 {
4987     assert_carry_dead(s);
4988     sync_globals(s, allocated_regs);
4989 
4990     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4991         TCGTemp *ts = &s->temps[i];
4992         /*
4993          * The liveness analysis already ensures that temps are dead.
4994          * Keep tcg_debug_asserts for safety.
4995          */
4996         switch (ts->kind) {
4997         case TEMP_TB:
4998             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4999             break;
5000         case TEMP_EBB:
5001         case TEMP_CONST:
5002             break;
5003         default:
5004             g_assert_not_reached();
5005         }
5006     }
5007 }
5008 
5009 /*
5010  * Specialized code generation for INDEX_op_mov_* with a constant.
5011  */
5012 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
5013                                   tcg_target_ulong val, TCGLifeData arg_life,
5014                                   TCGRegSet preferred_regs)
5015 {
5016     /* ENV should not be modified.  */
5017     tcg_debug_assert(!temp_readonly(ots));
5018 
5019     /* The movi is not explicitly generated here.  */
5020     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
5021     ots->val = val;
5022     ots->mem_coherent = 0;
5023     if (NEED_SYNC_ARG(0)) {
5024         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
5025     } else if (IS_DEAD_ARG(0)) {
5026         temp_dead(s, ots);
5027     }
5028 }
5029 
5030 /*
5031  * Specialized code generation for INDEX_op_mov_*.
5032  */
5033 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
5034 {
5035     const TCGLifeData arg_life = op->life;
5036     TCGRegSet allocated_regs, preferred_regs;
5037     TCGTemp *ts, *ots;
5038     TCGType otype, itype;
5039     TCGReg oreg, ireg;
5040 
5041     allocated_regs = s->reserved_regs;
5042     preferred_regs = output_pref(op, 0);
5043     ots = arg_temp(op->args[0]);
5044     ts = arg_temp(op->args[1]);
5045 
5046     /* ENV should not be modified.  */
5047     tcg_debug_assert(!temp_readonly(ots));
5048 
5049     /* Note that otype != itype for no-op truncation.  */
5050     otype = ots->type;
5051     itype = ts->type;
5052 
5053     if (ts->val_type == TEMP_VAL_CONST) {
5054         /* propagate constant or generate sti */
5055         tcg_target_ulong val = ts->val;
5056         if (IS_DEAD_ARG(1)) {
5057             temp_dead(s, ts);
5058         }
5059         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
5060         return;
5061     }
5062 
5063     /* If the source value is in memory we're going to be forced
5064        to have it in a register in order to perform the copy.  Copy
5065        the SOURCE value into its own register first, that way we
5066        don't have to reload SOURCE the next time it is used. */
5067     if (ts->val_type == TEMP_VAL_MEM) {
5068         temp_load(s, ts, tcg_target_available_regs[itype],
5069                   allocated_regs, preferred_regs);
5070     }
5071     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
5072     ireg = ts->reg;
5073 
5074     if (IS_DEAD_ARG(0)) {
5075         /* mov to a non-saved dead register makes no sense (even with
5076            liveness analysis disabled). */
5077         tcg_debug_assert(NEED_SYNC_ARG(0));
5078         if (!ots->mem_allocated) {
5079             temp_allocate_frame(s, ots);
5080         }
5081         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
5082         if (IS_DEAD_ARG(1)) {
5083             temp_dead(s, ts);
5084         }
5085         temp_dead(s, ots);
5086         return;
5087     }
5088 
5089     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
5090         /*
5091          * The mov can be suppressed.  Kill input first, so that it
5092          * is unlinked from reg_to_temp, then set the output to the
5093          * reg that we saved from the input.
5094          */
5095         temp_dead(s, ts);
5096         oreg = ireg;
5097     } else {
5098         if (ots->val_type == TEMP_VAL_REG) {
5099             oreg = ots->reg;
5100         } else {
5101             /* Make sure to not spill the input register during allocation. */
5102             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
5103                                  allocated_regs | ((TCGRegSet)1 << ireg),
5104                                  preferred_regs, ots->indirect_base);
5105         }
5106         if (!tcg_out_mov(s, otype, oreg, ireg)) {
5107             /*
5108              * Cross register class move not supported.
5109              * Store the source register into the destination slot
5110              * and leave the destination temp as TEMP_VAL_MEM.
5111              */
5112             assert(!temp_readonly(ots));
5113             if (!ts->mem_allocated) {
5114                 temp_allocate_frame(s, ots);
5115             }
5116             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
5117             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
5118             ots->mem_coherent = 1;
5119             return;
5120         }
5121     }
5122     set_temp_val_reg(s, ots, oreg);
5123     ots->mem_coherent = 0;
5124 
5125     if (NEED_SYNC_ARG(0)) {
5126         temp_sync(s, ots, allocated_regs, 0, 0);
5127     }
5128 }
5129 
5130 /*
5131  * Specialized code generation for INDEX_op_dup_vec.
5132  */
5133 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
5134 {
5135     const TCGLifeData arg_life = op->life;
5136     TCGRegSet dup_out_regs, dup_in_regs;
5137     const TCGArgConstraint *dup_args_ct;
5138     TCGTemp *its, *ots;
5139     TCGType itype, vtype;
5140     unsigned vece;
5141     int lowpart_ofs;
5142     bool ok;
5143 
5144     ots = arg_temp(op->args[0]);
5145     its = arg_temp(op->args[1]);
5146 
5147     /* ENV should not be modified.  */
5148     tcg_debug_assert(!temp_readonly(ots));
5149 
5150     itype = its->type;
5151     vece = TCGOP_VECE(op);
5152     vtype = TCGOP_TYPE(op);
5153 
5154     if (its->val_type == TEMP_VAL_CONST) {
5155         /* Propagate constant via movi -> dupi.  */
5156         tcg_target_ulong val = its->val;
5157         if (IS_DEAD_ARG(1)) {
5158             temp_dead(s, its);
5159         }
5160         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
5161         return;
5162     }
5163 
5164     dup_args_ct = opcode_args_ct(op);
5165     dup_out_regs = dup_args_ct[0].regs;
5166     dup_in_regs = dup_args_ct[1].regs;
5167 
5168     /* Allocate the output register now.  */
5169     if (ots->val_type != TEMP_VAL_REG) {
5170         TCGRegSet allocated_regs = s->reserved_regs;
5171         TCGReg oreg;
5172 
5173         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5174             /* Make sure to not spill the input register. */
5175             tcg_regset_set_reg(allocated_regs, its->reg);
5176         }
5177         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5178                              output_pref(op, 0), ots->indirect_base);
5179         set_temp_val_reg(s, ots, oreg);
5180     }
5181 
5182     switch (its->val_type) {
5183     case TEMP_VAL_REG:
5184         /*
5185          * The dup constriaints must be broad, covering all possible VECE.
5186          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5187          * to fail, indicating that extra moves are required for that case.
5188          */
5189         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5190             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5191                 goto done;
5192             }
5193             /* Try again from memory or a vector input register.  */
5194         }
5195         if (!its->mem_coherent) {
5196             /*
5197              * The input register is not synced, and so an extra store
5198              * would be required to use memory.  Attempt an integer-vector
5199              * register move first.  We do not have a TCGRegSet for this.
5200              */
5201             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5202                 break;
5203             }
5204             /* Sync the temp back to its slot and load from there.  */
5205             temp_sync(s, its, s->reserved_regs, 0, 0);
5206         }
5207         /* fall through */
5208 
5209     case TEMP_VAL_MEM:
5210         lowpart_ofs = 0;
5211         if (HOST_BIG_ENDIAN) {
5212             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5213         }
5214         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5215                              its->mem_offset + lowpart_ofs)) {
5216             goto done;
5217         }
5218         /* Load the input into the destination vector register. */
5219         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5220         break;
5221 
5222     default:
5223         g_assert_not_reached();
5224     }
5225 
5226     /* We now have a vector input register, so dup must succeed. */
5227     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5228     tcg_debug_assert(ok);
5229 
5230  done:
5231     ots->mem_coherent = 0;
5232     if (IS_DEAD_ARG(1)) {
5233         temp_dead(s, its);
5234     }
5235     if (NEED_SYNC_ARG(0)) {
5236         temp_sync(s, ots, s->reserved_regs, 0, 0);
5237     }
5238     if (IS_DEAD_ARG(0)) {
5239         temp_dead(s, ots);
5240     }
5241 }
5242 
5243 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5244 {
5245     const TCGLifeData arg_life = op->life;
5246     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5247     TCGRegSet i_allocated_regs;
5248     TCGRegSet o_allocated_regs;
5249     int i, k, nb_iargs, nb_oargs;
5250     TCGReg reg;
5251     TCGArg arg;
5252     const TCGArgConstraint *args_ct;
5253     const TCGArgConstraint *arg_ct;
5254     TCGTemp *ts;
5255     TCGArg new_args[TCG_MAX_OP_ARGS];
5256     int const_args[TCG_MAX_OP_ARGS];
5257     TCGCond op_cond;
5258 
5259     if (def->flags & TCG_OPF_CARRY_IN) {
5260         tcg_debug_assert(s->carry_live);
5261     }
5262 
5263     nb_oargs = def->nb_oargs;
5264     nb_iargs = def->nb_iargs;
5265 
5266     /* copy constants */
5267     memcpy(new_args + nb_oargs + nb_iargs,
5268            op->args + nb_oargs + nb_iargs,
5269            sizeof(TCGArg) * def->nb_cargs);
5270 
5271     i_allocated_regs = s->reserved_regs;
5272     o_allocated_regs = s->reserved_regs;
5273 
5274     switch (op->opc) {
5275     case INDEX_op_brcond:
5276         op_cond = op->args[2];
5277         break;
5278     case INDEX_op_setcond:
5279     case INDEX_op_negsetcond:
5280     case INDEX_op_cmp_vec:
5281         op_cond = op->args[3];
5282         break;
5283     case INDEX_op_brcond2_i32:
5284         op_cond = op->args[4];
5285         break;
5286     case INDEX_op_movcond:
5287     case INDEX_op_setcond2_i32:
5288     case INDEX_op_cmpsel_vec:
5289         op_cond = op->args[5];
5290         break;
5291     default:
5292         /* No condition within opcode. */
5293         op_cond = TCG_COND_ALWAYS;
5294         break;
5295     }
5296 
5297     args_ct = opcode_args_ct(op);
5298 
5299     /* satisfy input constraints */
5300     for (k = 0; k < nb_iargs; k++) {
5301         TCGRegSet i_preferred_regs, i_required_regs;
5302         bool allocate_new_reg, copyto_new_reg;
5303         TCGTemp *ts2;
5304         int i1, i2;
5305 
5306         i = args_ct[nb_oargs + k].sort_index;
5307         arg = op->args[i];
5308         arg_ct = &args_ct[i];
5309         ts = arg_temp(arg);
5310 
5311         if (ts->val_type == TEMP_VAL_CONST) {
5312 #ifdef TCG_REG_ZERO
5313             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5314                 /* Hardware zero register: indicate register via non-const. */
5315                 const_args[i] = 0;
5316                 new_args[i] = TCG_REG_ZERO;
5317                 continue;
5318             }
5319 #endif
5320 
5321             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5322                                        op_cond, TCGOP_VECE(op))) {
5323                 /* constant is OK for instruction */
5324                 const_args[i] = 1;
5325                 new_args[i] = ts->val;
5326                 continue;
5327             }
5328         }
5329 
5330         reg = ts->reg;
5331         i_preferred_regs = 0;
5332         i_required_regs = arg_ct->regs;
5333         allocate_new_reg = false;
5334         copyto_new_reg = false;
5335 
5336         switch (arg_ct->pair) {
5337         case 0: /* not paired */
5338             if (arg_ct->ialias) {
5339                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5340 
5341                 /*
5342                  * If the input is readonly, then it cannot also be an
5343                  * output and aliased to itself.  If the input is not
5344                  * dead after the instruction, we must allocate a new
5345                  * register and move it.
5346                  */
5347                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5348                     || args_ct[arg_ct->alias_index].newreg) {
5349                     allocate_new_reg = true;
5350                 } else if (ts->val_type == TEMP_VAL_REG) {
5351                     /*
5352                      * Check if the current register has already been
5353                      * allocated for another input.
5354                      */
5355                     allocate_new_reg =
5356                         tcg_regset_test_reg(i_allocated_regs, reg);
5357                 }
5358             }
5359             if (!allocate_new_reg) {
5360                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5361                           i_preferred_regs);
5362                 reg = ts->reg;
5363                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5364             }
5365             if (allocate_new_reg) {
5366                 /*
5367                  * Allocate a new register matching the constraint
5368                  * and move the temporary register into it.
5369                  */
5370                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5371                           i_allocated_regs, 0);
5372                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5373                                     i_preferred_regs, ts->indirect_base);
5374                 copyto_new_reg = true;
5375             }
5376             break;
5377 
5378         case 1:
5379             /* First of an input pair; if i1 == i2, the second is an output. */
5380             i1 = i;
5381             i2 = arg_ct->pair_index;
5382             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5383 
5384             /*
5385              * It is easier to default to allocating a new pair
5386              * and to identify a few cases where it's not required.
5387              */
5388             if (arg_ct->ialias) {
5389                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5390                 if (IS_DEAD_ARG(i1) &&
5391                     IS_DEAD_ARG(i2) &&
5392                     !temp_readonly(ts) &&
5393                     ts->val_type == TEMP_VAL_REG &&
5394                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5395                     tcg_regset_test_reg(i_required_regs, reg) &&
5396                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5397                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5398                     (ts2
5399                      ? ts2->val_type == TEMP_VAL_REG &&
5400                        ts2->reg == reg + 1 &&
5401                        !temp_readonly(ts2)
5402                      : s->reg_to_temp[reg + 1] == NULL)) {
5403                     break;
5404                 }
5405             } else {
5406                 /* Without aliasing, the pair must also be an input. */
5407                 tcg_debug_assert(ts2);
5408                 if (ts->val_type == TEMP_VAL_REG &&
5409                     ts2->val_type == TEMP_VAL_REG &&
5410                     ts2->reg == reg + 1 &&
5411                     tcg_regset_test_reg(i_required_regs, reg)) {
5412                     break;
5413                 }
5414             }
5415             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5416                                      0, ts->indirect_base);
5417             goto do_pair;
5418 
5419         case 2: /* pair second */
5420             reg = new_args[arg_ct->pair_index] + 1;
5421             goto do_pair;
5422 
5423         case 3: /* ialias with second output, no first input */
5424             tcg_debug_assert(arg_ct->ialias);
5425             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5426 
5427             if (IS_DEAD_ARG(i) &&
5428                 !temp_readonly(ts) &&
5429                 ts->val_type == TEMP_VAL_REG &&
5430                 reg > 0 &&
5431                 s->reg_to_temp[reg - 1] == NULL &&
5432                 tcg_regset_test_reg(i_required_regs, reg) &&
5433                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5434                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5435                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5436                 break;
5437             }
5438             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5439                                      i_allocated_regs, 0,
5440                                      ts->indirect_base);
5441             tcg_regset_set_reg(i_allocated_regs, reg);
5442             reg += 1;
5443             goto do_pair;
5444 
5445         do_pair:
5446             /*
5447              * If an aliased input is not dead after the instruction,
5448              * we must allocate a new register and move it.
5449              */
5450             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5451                 TCGRegSet t_allocated_regs = i_allocated_regs;
5452 
5453                 /*
5454                  * Because of the alias, and the continued life, make sure
5455                  * that the temp is somewhere *other* than the reg pair,
5456                  * and we get a copy in reg.
5457                  */
5458                 tcg_regset_set_reg(t_allocated_regs, reg);
5459                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5460                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5461                     /* If ts was already in reg, copy it somewhere else. */
5462                     TCGReg nr;
5463                     bool ok;
5464 
5465                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5466                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5467                                        t_allocated_regs, 0, ts->indirect_base);
5468                     ok = tcg_out_mov(s, ts->type, nr, reg);
5469                     tcg_debug_assert(ok);
5470 
5471                     set_temp_val_reg(s, ts, nr);
5472                 } else {
5473                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5474                               t_allocated_regs, 0);
5475                     copyto_new_reg = true;
5476                 }
5477             } else {
5478                 /* Preferably allocate to reg, otherwise copy. */
5479                 i_required_regs = (TCGRegSet)1 << reg;
5480                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5481                           i_preferred_regs);
5482                 copyto_new_reg = ts->reg != reg;
5483             }
5484             break;
5485 
5486         default:
5487             g_assert_not_reached();
5488         }
5489 
5490         if (copyto_new_reg) {
5491             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5492                 /*
5493                  * Cross register class move not supported.  Sync the
5494                  * temp back to its slot and load from there.
5495                  */
5496                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5497                 tcg_out_ld(s, ts->type, reg,
5498                            ts->mem_base->reg, ts->mem_offset);
5499             }
5500         }
5501         new_args[i] = reg;
5502         const_args[i] = 0;
5503         tcg_regset_set_reg(i_allocated_regs, reg);
5504     }
5505 
5506     /* mark dead temporaries and free the associated registers */
5507     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5508         if (IS_DEAD_ARG(i)) {
5509             temp_dead(s, arg_temp(op->args[i]));
5510         }
5511     }
5512 
5513     if (def->flags & TCG_OPF_COND_BRANCH) {
5514         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5515     } else if (def->flags & TCG_OPF_BB_END) {
5516         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5517     } else {
5518         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5519             assert_carry_dead(s);
5520             /* XXX: permit generic clobber register list ? */
5521             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5522                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5523                     tcg_reg_free(s, i, i_allocated_regs);
5524                 }
5525             }
5526         }
5527         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5528             /* sync globals if the op has side effects and might trigger
5529                an exception. */
5530             sync_globals(s, i_allocated_regs);
5531         }
5532 
5533         /* satisfy the output constraints */
5534         for (k = 0; k < nb_oargs; k++) {
5535             i = args_ct[k].sort_index;
5536             arg = op->args[i];
5537             arg_ct = &args_ct[i];
5538             ts = arg_temp(arg);
5539 
5540             /* ENV should not be modified.  */
5541             tcg_debug_assert(!temp_readonly(ts));
5542 
5543             switch (arg_ct->pair) {
5544             case 0: /* not paired */
5545                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5546                     reg = new_args[arg_ct->alias_index];
5547                 } else if (arg_ct->newreg) {
5548                     reg = tcg_reg_alloc(s, arg_ct->regs,
5549                                         i_allocated_regs | o_allocated_regs,
5550                                         output_pref(op, k), ts->indirect_base);
5551                 } else {
5552                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5553                                         output_pref(op, k), ts->indirect_base);
5554                 }
5555                 break;
5556 
5557             case 1: /* first of pair */
5558                 if (arg_ct->oalias) {
5559                     reg = new_args[arg_ct->alias_index];
5560                 } else if (arg_ct->newreg) {
5561                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5562                                              i_allocated_regs | o_allocated_regs,
5563                                              output_pref(op, k),
5564                                              ts->indirect_base);
5565                 } else {
5566                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5567                                              output_pref(op, k),
5568                                              ts->indirect_base);
5569                 }
5570                 break;
5571 
5572             case 2: /* second of pair */
5573                 if (arg_ct->oalias) {
5574                     reg = new_args[arg_ct->alias_index];
5575                 } else {
5576                     reg = new_args[arg_ct->pair_index] + 1;
5577                 }
5578                 break;
5579 
5580             case 3: /* first of pair, aliasing with a second input */
5581                 tcg_debug_assert(!arg_ct->newreg);
5582                 reg = new_args[arg_ct->pair_index] - 1;
5583                 break;
5584 
5585             default:
5586                 g_assert_not_reached();
5587             }
5588             tcg_regset_set_reg(o_allocated_regs, reg);
5589             set_temp_val_reg(s, ts, reg);
5590             ts->mem_coherent = 0;
5591             new_args[i] = reg;
5592         }
5593     }
5594 
5595     /* emit instruction */
5596     TCGType type = TCGOP_TYPE(op);
5597     switch (op->opc) {
5598     case INDEX_op_addc1o:
5599         tcg_out_set_carry(s);
5600         /* fall through */
5601     case INDEX_op_add:
5602     case INDEX_op_addcio:
5603     case INDEX_op_addco:
5604     case INDEX_op_and:
5605     case INDEX_op_andc:
5606     case INDEX_op_clz:
5607     case INDEX_op_ctz:
5608     case INDEX_op_divs:
5609     case INDEX_op_divu:
5610     case INDEX_op_eqv:
5611     case INDEX_op_mul:
5612     case INDEX_op_mulsh:
5613     case INDEX_op_muluh:
5614     case INDEX_op_nand:
5615     case INDEX_op_nor:
5616     case INDEX_op_or:
5617     case INDEX_op_orc:
5618     case INDEX_op_rems:
5619     case INDEX_op_remu:
5620     case INDEX_op_rotl:
5621     case INDEX_op_rotr:
5622     case INDEX_op_sar:
5623     case INDEX_op_shl:
5624     case INDEX_op_shr:
5625     case INDEX_op_xor:
5626         {
5627             const TCGOutOpBinary *out =
5628                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5629 
5630             /* Constants should never appear in the first source operand. */
5631             tcg_debug_assert(!const_args[1]);
5632             if (const_args[2]) {
5633                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5634             } else {
5635                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5636             }
5637         }
5638         break;
5639 
5640     case INDEX_op_sub:
5641         {
5642             const TCGOutOpSubtract *out = &outop_sub;
5643 
5644             /*
5645              * Constants should never appear in the second source operand.
5646              * These are folded to add with negative constant.
5647              */
5648             tcg_debug_assert(!const_args[2]);
5649             if (const_args[1]) {
5650                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5651             } else {
5652                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5653             }
5654         }
5655         break;
5656 
5657     case INDEX_op_subb1o:
5658         tcg_out_set_borrow(s);
5659         /* fall through */
5660     case INDEX_op_addci:
5661     case INDEX_op_subbi:
5662     case INDEX_op_subbio:
5663     case INDEX_op_subbo:
5664         {
5665             const TCGOutOpAddSubCarry *out =
5666                 container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base);
5667 
5668             if (const_args[2]) {
5669                 if (const_args[1]) {
5670                     out->out_rii(s, type, new_args[0],
5671                                  new_args[1], new_args[2]);
5672                 } else {
5673                     out->out_rri(s, type, new_args[0],
5674                                  new_args[1], new_args[2]);
5675                 }
5676             } else if (const_args[1]) {
5677                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5678             } else {
5679                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5680             }
5681         }
5682         break;
5683 
5684     case INDEX_op_bswap64:
5685     case INDEX_op_ext_i32_i64:
5686     case INDEX_op_extu_i32_i64:
5687     case INDEX_op_extrl_i64_i32:
5688     case INDEX_op_extrh_i64_i32:
5689         assert(TCG_TARGET_REG_BITS == 64);
5690         /* fall through */
5691     case INDEX_op_ctpop:
5692     case INDEX_op_neg:
5693     case INDEX_op_not:
5694         {
5695             const TCGOutOpUnary *out =
5696                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5697 
5698             /* Constants should have been folded. */
5699             tcg_debug_assert(!const_args[1]);
5700             out->out_rr(s, type, new_args[0], new_args[1]);
5701         }
5702         break;
5703 
5704     case INDEX_op_bswap16:
5705     case INDEX_op_bswap32:
5706         {
5707             const TCGOutOpBswap *out =
5708                 container_of(all_outop[op->opc], TCGOutOpBswap, base);
5709 
5710             tcg_debug_assert(!const_args[1]);
5711             out->out_rr(s, type, new_args[0], new_args[1], new_args[2]);
5712         }
5713         break;
5714 
5715     case INDEX_op_deposit:
5716         {
5717             const TCGOutOpDeposit *out = &outop_deposit;
5718 
5719             if (const_args[2]) {
5720                 tcg_debug_assert(!const_args[1]);
5721                 out->out_rri(s, type, new_args[0], new_args[1],
5722                              new_args[2], new_args[3], new_args[4]);
5723             } else if (const_args[1]) {
5724                 tcg_debug_assert(new_args[1] == 0);
5725                 tcg_debug_assert(!const_args[2]);
5726                 out->out_rzr(s, type, new_args[0], new_args[2],
5727                              new_args[3], new_args[4]);
5728             } else {
5729                 out->out_rrr(s, type, new_args[0], new_args[1],
5730                              new_args[2], new_args[3], new_args[4]);
5731             }
5732         }
5733         break;
5734 
5735     case INDEX_op_divs2:
5736     case INDEX_op_divu2:
5737         {
5738             const TCGOutOpDivRem *out =
5739                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5740 
5741             /* Only used by x86 and s390x, which use matching constraints. */
5742             tcg_debug_assert(new_args[0] == new_args[2]);
5743             tcg_debug_assert(new_args[1] == new_args[3]);
5744             tcg_debug_assert(!const_args[4]);
5745             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5746         }
5747         break;
5748 
5749     case INDEX_op_extract:
5750     case INDEX_op_sextract:
5751         {
5752             const TCGOutOpExtract *out =
5753                 container_of(all_outop[op->opc], TCGOutOpExtract, base);
5754 
5755             tcg_debug_assert(!const_args[1]);
5756             out->out_rr(s, type, new_args[0], new_args[1],
5757                         new_args[2], new_args[3]);
5758         }
5759         break;
5760 
5761     case INDEX_op_extract2:
5762         {
5763             const TCGOutOpExtract2 *out = &outop_extract2;
5764 
5765             tcg_debug_assert(!const_args[1]);
5766             tcg_debug_assert(!const_args[2]);
5767             out->out_rrr(s, type, new_args[0], new_args[1],
5768                          new_args[2], new_args[3]);
5769         }
5770         break;
5771 
5772     case INDEX_op_ld8u:
5773     case INDEX_op_ld8s:
5774     case INDEX_op_ld16u:
5775     case INDEX_op_ld16s:
5776     case INDEX_op_ld32u:
5777     case INDEX_op_ld32s:
5778     case INDEX_op_ld:
5779         {
5780             const TCGOutOpLoad *out =
5781                 container_of(all_outop[op->opc], TCGOutOpLoad, base);
5782 
5783             tcg_debug_assert(!const_args[1]);
5784             out->out(s, type, new_args[0], new_args[1], new_args[2]);
5785         }
5786         break;
5787 
5788     case INDEX_op_muls2:
5789     case INDEX_op_mulu2:
5790         {
5791             const TCGOutOpMul2 *out =
5792                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5793 
5794             tcg_debug_assert(!const_args[2]);
5795             tcg_debug_assert(!const_args[3]);
5796             out->out_rrrr(s, type, new_args[0], new_args[1],
5797                           new_args[2], new_args[3]);
5798         }
5799         break;
5800 
5801     case INDEX_op_st32:
5802         /* Use tcg_op_st w/ I32. */
5803         type = TCG_TYPE_I32;
5804         /* fall through */
5805     case INDEX_op_st:
5806     case INDEX_op_st8:
5807     case INDEX_op_st16:
5808         {
5809             const TCGOutOpStore *out =
5810                 container_of(all_outop[op->opc], TCGOutOpStore, base);
5811 
5812             if (const_args[0]) {
5813                 out->out_i(s, type, new_args[0], new_args[1], new_args[2]);
5814             } else {
5815                 out->out_r(s, type, new_args[0], new_args[1], new_args[2]);
5816             }
5817         }
5818         break;
5819 
5820     case INDEX_op_qemu_ld:
5821     case INDEX_op_qemu_st:
5822         {
5823             const TCGOutOpQemuLdSt *out =
5824                 container_of(all_outop[op->opc], TCGOutOpQemuLdSt, base);
5825 
5826             out->out(s, type, new_args[0], new_args[1], new_args[2]);
5827         }
5828         break;
5829 
5830     case INDEX_op_qemu_ld2:
5831     case INDEX_op_qemu_st2:
5832         {
5833             const TCGOutOpQemuLdSt2 *out =
5834                 container_of(all_outop[op->opc], TCGOutOpQemuLdSt2, base);
5835 
5836             out->out(s, type, new_args[0], new_args[1],
5837                      new_args[2], new_args[3]);
5838         }
5839         break;
5840 
5841     case INDEX_op_brcond:
5842         {
5843             const TCGOutOpBrcond *out = &outop_brcond;
5844             TCGCond cond = new_args[2];
5845             TCGLabel *label = arg_label(new_args[3]);
5846 
5847             tcg_debug_assert(!const_args[0]);
5848             if (const_args[1]) {
5849                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5850             } else {
5851                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5852             }
5853         }
5854         break;
5855 
5856     case INDEX_op_movcond:
5857         {
5858             const TCGOutOpMovcond *out = &outop_movcond;
5859             TCGCond cond = new_args[5];
5860 
5861             tcg_debug_assert(!const_args[1]);
5862             out->out(s, type, cond, new_args[0],
5863                      new_args[1], new_args[2], const_args[2],
5864                      new_args[3], const_args[3],
5865                      new_args[4], const_args[4]);
5866         }
5867         break;
5868 
5869     case INDEX_op_setcond:
5870     case INDEX_op_negsetcond:
5871         {
5872             const TCGOutOpSetcond *out =
5873                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5874             TCGCond cond = new_args[3];
5875 
5876             tcg_debug_assert(!const_args[1]);
5877             if (const_args[2]) {
5878                 out->out_rri(s, type, cond,
5879                              new_args[0], new_args[1], new_args[2]);
5880             } else {
5881                 out->out_rrr(s, type, cond,
5882                              new_args[0], new_args[1], new_args[2]);
5883             }
5884         }
5885         break;
5886 
5887 #if TCG_TARGET_REG_BITS == 32
5888     case INDEX_op_brcond2_i32:
5889         {
5890             const TCGOutOpBrcond2 *out = &outop_brcond2;
5891             TCGCond cond = new_args[4];
5892             TCGLabel *label = arg_label(new_args[5]);
5893 
5894             tcg_debug_assert(!const_args[0]);
5895             tcg_debug_assert(!const_args[1]);
5896             out->out(s, cond, new_args[0], new_args[1],
5897                      new_args[2], const_args[2],
5898                      new_args[3], const_args[3], label);
5899         }
5900         break;
5901     case INDEX_op_setcond2_i32:
5902         {
5903             const TCGOutOpSetcond2 *out = &outop_setcond2;
5904             TCGCond cond = new_args[5];
5905 
5906             tcg_debug_assert(!const_args[1]);
5907             tcg_debug_assert(!const_args[2]);
5908             out->out(s, cond, new_args[0], new_args[1], new_args[2],
5909                      new_args[3], const_args[3], new_args[4], const_args[4]);
5910         }
5911         break;
5912 #else
5913     case INDEX_op_brcond2_i32:
5914     case INDEX_op_setcond2_i32:
5915         g_assert_not_reached();
5916 #endif
5917 
5918     case INDEX_op_goto_ptr:
5919         tcg_debug_assert(!const_args[0]);
5920         tcg_out_goto_ptr(s, new_args[0]);
5921         break;
5922 
5923     default:
5924         tcg_debug_assert(def->flags & TCG_OPF_VECTOR);
5925         tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5926                        TCGOP_VECE(op), new_args, const_args);
5927         break;
5928     }
5929 
5930     if (def->flags & TCG_OPF_CARRY_IN) {
5931         s->carry_live = false;
5932     }
5933     if (def->flags & TCG_OPF_CARRY_OUT) {
5934         s->carry_live = true;
5935     }
5936 
5937     /* move the outputs in the correct register if needed */
5938     for(i = 0; i < nb_oargs; i++) {
5939         ts = arg_temp(op->args[i]);
5940 
5941         /* ENV should not be modified.  */
5942         tcg_debug_assert(!temp_readonly(ts));
5943 
5944         if (NEED_SYNC_ARG(i)) {
5945             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5946         } else if (IS_DEAD_ARG(i)) {
5947             temp_dead(s, ts);
5948         }
5949     }
5950 }
5951 
5952 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5953 {
5954     const TCGLifeData arg_life = op->life;
5955     TCGTemp *ots, *itsl, *itsh;
5956     TCGType vtype = TCGOP_TYPE(op);
5957 
5958     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5959     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5960     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5961 
5962     ots = arg_temp(op->args[0]);
5963     itsl = arg_temp(op->args[1]);
5964     itsh = arg_temp(op->args[2]);
5965 
5966     /* ENV should not be modified.  */
5967     tcg_debug_assert(!temp_readonly(ots));
5968 
5969     /* Allocate the output register now.  */
5970     if (ots->val_type != TEMP_VAL_REG) {
5971         TCGRegSet allocated_regs = s->reserved_regs;
5972         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5973         TCGReg oreg;
5974 
5975         /* Make sure to not spill the input registers. */
5976         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5977             tcg_regset_set_reg(allocated_regs, itsl->reg);
5978         }
5979         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5980             tcg_regset_set_reg(allocated_regs, itsh->reg);
5981         }
5982 
5983         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5984                              output_pref(op, 0), ots->indirect_base);
5985         set_temp_val_reg(s, ots, oreg);
5986     }
5987 
5988     /* Promote dup2 of immediates to dupi_vec. */
5989     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5990         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5991         MemOp vece = MO_64;
5992 
5993         if (val == dup_const(MO_8, val)) {
5994             vece = MO_8;
5995         } else if (val == dup_const(MO_16, val)) {
5996             vece = MO_16;
5997         } else if (val == dup_const(MO_32, val)) {
5998             vece = MO_32;
5999         }
6000 
6001         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
6002         goto done;
6003     }
6004 
6005     /* If the two inputs form one 64-bit value, try dupm_vec. */
6006     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
6007         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
6008         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
6009         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
6010 
6011         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
6012         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
6013 
6014         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
6015                              its->mem_base->reg, its->mem_offset)) {
6016             goto done;
6017         }
6018     }
6019 
6020     /* Fall back to generic expansion. */
6021     return false;
6022 
6023  done:
6024     ots->mem_coherent = 0;
6025     if (IS_DEAD_ARG(1)) {
6026         temp_dead(s, itsl);
6027     }
6028     if (IS_DEAD_ARG(2)) {
6029         temp_dead(s, itsh);
6030     }
6031     if (NEED_SYNC_ARG(0)) {
6032         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
6033     } else if (IS_DEAD_ARG(0)) {
6034         temp_dead(s, ots);
6035     }
6036     return true;
6037 }
6038 
6039 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
6040                          TCGRegSet allocated_regs)
6041 {
6042     if (ts->val_type == TEMP_VAL_REG) {
6043         if (ts->reg != reg) {
6044             tcg_reg_free(s, reg, allocated_regs);
6045             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
6046                 /*
6047                  * Cross register class move not supported.  Sync the
6048                  * temp back to its slot and load from there.
6049                  */
6050                 temp_sync(s, ts, allocated_regs, 0, 0);
6051                 tcg_out_ld(s, ts->type, reg,
6052                            ts->mem_base->reg, ts->mem_offset);
6053             }
6054         }
6055     } else {
6056         TCGRegSet arg_set = 0;
6057 
6058         tcg_reg_free(s, reg, allocated_regs);
6059         tcg_regset_set_reg(arg_set, reg);
6060         temp_load(s, ts, arg_set, allocated_regs, 0);
6061     }
6062 }
6063 
6064 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
6065                          TCGRegSet allocated_regs)
6066 {
6067     /*
6068      * When the destination is on the stack, load up the temp and store.
6069      * If there are many call-saved registers, the temp might live to
6070      * see another use; otherwise it'll be discarded.
6071      */
6072     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
6073     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
6074                arg_slot_stk_ofs(arg_slot));
6075 }
6076 
6077 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
6078                             TCGTemp *ts, TCGRegSet *allocated_regs)
6079 {
6080     if (arg_slot_reg_p(l->arg_slot)) {
6081         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
6082         load_arg_reg(s, reg, ts, *allocated_regs);
6083         tcg_regset_set_reg(*allocated_regs, reg);
6084     } else {
6085         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
6086     }
6087 }
6088 
6089 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
6090                          intptr_t ref_off, TCGRegSet *allocated_regs)
6091 {
6092     TCGReg reg;
6093 
6094     if (arg_slot_reg_p(arg_slot)) {
6095         reg = tcg_target_call_iarg_regs[arg_slot];
6096         tcg_reg_free(s, reg, *allocated_regs);
6097         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
6098         tcg_regset_set_reg(*allocated_regs, reg);
6099     } else {
6100         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
6101                             *allocated_regs, 0, false);
6102         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
6103         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
6104                    arg_slot_stk_ofs(arg_slot));
6105     }
6106 }
6107 
6108 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
6109 {
6110     const int nb_oargs = TCGOP_CALLO(op);
6111     const int nb_iargs = TCGOP_CALLI(op);
6112     const TCGLifeData arg_life = op->life;
6113     const TCGHelperInfo *info = tcg_call_info(op);
6114     TCGRegSet allocated_regs = s->reserved_regs;
6115     int i;
6116 
6117     /*
6118      * Move inputs into place in reverse order,
6119      * so that we place stacked arguments first.
6120      */
6121     for (i = nb_iargs - 1; i >= 0; --i) {
6122         const TCGCallArgumentLoc *loc = &info->in[i];
6123         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
6124 
6125         switch (loc->kind) {
6126         case TCG_CALL_ARG_NORMAL:
6127         case TCG_CALL_ARG_EXTEND_U:
6128         case TCG_CALL_ARG_EXTEND_S:
6129             load_arg_normal(s, loc, ts, &allocated_regs);
6130             break;
6131         case TCG_CALL_ARG_BY_REF:
6132             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6133             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
6134                          arg_slot_stk_ofs(loc->ref_slot),
6135                          &allocated_regs);
6136             break;
6137         case TCG_CALL_ARG_BY_REF_N:
6138             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6139             break;
6140         default:
6141             g_assert_not_reached();
6142         }
6143     }
6144 
6145     /* Mark dead temporaries and free the associated registers.  */
6146     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
6147         if (IS_DEAD_ARG(i)) {
6148             temp_dead(s, arg_temp(op->args[i]));
6149         }
6150     }
6151 
6152     /* Clobber call registers.  */
6153     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
6154         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
6155             tcg_reg_free(s, i, allocated_regs);
6156         }
6157     }
6158 
6159     /*
6160      * Save globals if they might be written by the helper,
6161      * sync them if they might be read.
6162      */
6163     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
6164         /* Nothing to do */
6165     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
6166         sync_globals(s, allocated_regs);
6167     } else {
6168         save_globals(s, allocated_regs);
6169     }
6170 
6171     /*
6172      * If the ABI passes a pointer to the returned struct as the first
6173      * argument, load that now.  Pass a pointer to the output home slot.
6174      */
6175     if (info->out_kind == TCG_CALL_RET_BY_REF) {
6176         TCGTemp *ts = arg_temp(op->args[0]);
6177 
6178         if (!ts->mem_allocated) {
6179             temp_allocate_frame(s, ts);
6180         }
6181         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
6182     }
6183 
6184     tcg_out_call(s, tcg_call_func(op), info);
6185 
6186     /* Assign output registers and emit moves if needed.  */
6187     switch (info->out_kind) {
6188     case TCG_CALL_RET_NORMAL:
6189         for (i = 0; i < nb_oargs; i++) {
6190             TCGTemp *ts = arg_temp(op->args[i]);
6191             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
6192 
6193             /* ENV should not be modified.  */
6194             tcg_debug_assert(!temp_readonly(ts));
6195 
6196             set_temp_val_reg(s, ts, reg);
6197             ts->mem_coherent = 0;
6198         }
6199         break;
6200 
6201     case TCG_CALL_RET_BY_VEC:
6202         {
6203             TCGTemp *ts = arg_temp(op->args[0]);
6204 
6205             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
6206             tcg_debug_assert(ts->temp_subindex == 0);
6207             if (!ts->mem_allocated) {
6208                 temp_allocate_frame(s, ts);
6209             }
6210             tcg_out_st(s, TCG_TYPE_V128,
6211                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6212                        ts->mem_base->reg, ts->mem_offset);
6213         }
6214         /* fall through to mark all parts in memory */
6215 
6216     case TCG_CALL_RET_BY_REF:
6217         /* The callee has performed a write through the reference. */
6218         for (i = 0; i < nb_oargs; i++) {
6219             TCGTemp *ts = arg_temp(op->args[i]);
6220             ts->val_type = TEMP_VAL_MEM;
6221         }
6222         break;
6223 
6224     default:
6225         g_assert_not_reached();
6226     }
6227 
6228     /* Flush or discard output registers as needed. */
6229     for (i = 0; i < nb_oargs; i++) {
6230         TCGTemp *ts = arg_temp(op->args[i]);
6231         if (NEED_SYNC_ARG(i)) {
6232             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
6233         } else if (IS_DEAD_ARG(i)) {
6234             temp_dead(s, ts);
6235         }
6236     }
6237 }
6238 
6239 /**
6240  * atom_and_align_for_opc:
6241  * @s: tcg context
6242  * @opc: memory operation code
6243  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
6244  * @allow_two_ops: true if we are prepared to issue two operations
6245  *
6246  * Return the alignment and atomicity to use for the inline fast path
6247  * for the given memory operation.  The alignment may be larger than
6248  * that specified in @opc, and the correct alignment will be diagnosed
6249  * by the slow path helper.
6250  *
6251  * If @allow_two_ops, the host is prepared to test for 2x alignment,
6252  * and issue two loads or stores for subalignment.
6253  */
6254 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
6255                                            MemOp host_atom, bool allow_two_ops)
6256 {
6257     MemOp align = memop_alignment_bits(opc);
6258     MemOp size = opc & MO_SIZE;
6259     MemOp half = size ? size - 1 : 0;
6260     MemOp atom = opc & MO_ATOM_MASK;
6261     MemOp atmax;
6262 
6263     switch (atom) {
6264     case MO_ATOM_NONE:
6265         /* The operation requires no specific atomicity. */
6266         atmax = MO_8;
6267         break;
6268 
6269     case MO_ATOM_IFALIGN:
6270         atmax = size;
6271         break;
6272 
6273     case MO_ATOM_IFALIGN_PAIR:
6274         atmax = half;
6275         break;
6276 
6277     case MO_ATOM_WITHIN16:
6278         atmax = size;
6279         if (size == MO_128) {
6280             /* Misalignment implies !within16, and therefore no atomicity. */
6281         } else if (host_atom != MO_ATOM_WITHIN16) {
6282             /* The host does not implement within16, so require alignment. */
6283             align = MAX(align, size);
6284         }
6285         break;
6286 
6287     case MO_ATOM_WITHIN16_PAIR:
6288         atmax = size;
6289         /*
6290          * Misalignment implies !within16, and therefore half atomicity.
6291          * Any host prepared for two operations can implement this with
6292          * half alignment.
6293          */
6294         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
6295             align = MAX(align, half);
6296         }
6297         break;
6298 
6299     case MO_ATOM_SUBALIGN:
6300         atmax = size;
6301         if (host_atom != MO_ATOM_SUBALIGN) {
6302             /* If unaligned but not odd, there are subobjects up to half. */
6303             if (allow_two_ops) {
6304                 align = MAX(align, half);
6305             } else {
6306                 align = MAX(align, size);
6307             }
6308         }
6309         break;
6310 
6311     default:
6312         g_assert_not_reached();
6313     }
6314 
6315     return (TCGAtomAlign){ .atom = atmax, .align = align };
6316 }
6317 
6318 /*
6319  * Similarly for qemu_ld/st slow path helpers.
6320  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
6321  * using only the provided backend tcg_out_* functions.
6322  */
6323 
6324 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
6325 {
6326     int ofs = arg_slot_stk_ofs(slot);
6327 
6328     /*
6329      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
6330      * require extension to uint64_t, adjust the address for uint32_t.
6331      */
6332     if (HOST_BIG_ENDIAN &&
6333         TCG_TARGET_REG_BITS == 64 &&
6334         type == TCG_TYPE_I32) {
6335         ofs += 4;
6336     }
6337     return ofs;
6338 }
6339 
6340 static void tcg_out_helper_load_slots(TCGContext *s,
6341                                       unsigned nmov, TCGMovExtend *mov,
6342                                       const TCGLdstHelperParam *parm)
6343 {
6344     unsigned i;
6345     TCGReg dst3;
6346 
6347     /*
6348      * Start from the end, storing to the stack first.
6349      * This frees those registers, so we need not consider overlap.
6350      */
6351     for (i = nmov; i-- > 0; ) {
6352         unsigned slot = mov[i].dst;
6353 
6354         if (arg_slot_reg_p(slot)) {
6355             goto found_reg;
6356         }
6357 
6358         TCGReg src = mov[i].src;
6359         TCGType dst_type = mov[i].dst_type;
6360         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6361 
6362         /* The argument is going onto the stack; extend into scratch. */
6363         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
6364             tcg_debug_assert(parm->ntmp != 0);
6365             mov[i].dst = src = parm->tmp[0];
6366             tcg_out_movext1(s, &mov[i]);
6367         }
6368 
6369         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
6370                    tcg_out_helper_stk_ofs(dst_type, slot));
6371     }
6372     return;
6373 
6374  found_reg:
6375     /*
6376      * The remaining arguments are in registers.
6377      * Convert slot numbers to argument registers.
6378      */
6379     nmov = i + 1;
6380     for (i = 0; i < nmov; ++i) {
6381         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
6382     }
6383 
6384     switch (nmov) {
6385     case 4:
6386         /* The backend must have provided enough temps for the worst case. */
6387         tcg_debug_assert(parm->ntmp >= 2);
6388 
6389         dst3 = mov[3].dst;
6390         for (unsigned j = 0; j < 3; ++j) {
6391             if (dst3 == mov[j].src) {
6392                 /*
6393                  * Conflict. Copy the source to a temporary, perform the
6394                  * remaining moves, then the extension from our scratch
6395                  * on the way out.
6396                  */
6397                 TCGReg scratch = parm->tmp[1];
6398 
6399                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6400                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6401                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6402                 break;
6403             }
6404         }
6405 
6406         /* No conflicts: perform this move and continue. */
6407         tcg_out_movext1(s, &mov[3]);
6408         /* fall through */
6409 
6410     case 3:
6411         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6412                         parm->ntmp ? parm->tmp[0] : -1);
6413         break;
6414     case 2:
6415         tcg_out_movext2(s, mov, mov + 1,
6416                         parm->ntmp ? parm->tmp[0] : -1);
6417         break;
6418     case 1:
6419         tcg_out_movext1(s, mov);
6420         break;
6421     default:
6422         g_assert_not_reached();
6423     }
6424 }
6425 
6426 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6427                                     TCGType type, tcg_target_long imm,
6428                                     const TCGLdstHelperParam *parm)
6429 {
6430     if (arg_slot_reg_p(slot)) {
6431         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6432     } else {
6433         int ofs = tcg_out_helper_stk_ofs(type, slot);
6434         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6435             tcg_debug_assert(parm->ntmp != 0);
6436             tcg_out_movi(s, type, parm->tmp[0], imm);
6437             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6438         }
6439     }
6440 }
6441 
6442 static void tcg_out_helper_load_common_args(TCGContext *s,
6443                                             const TCGLabelQemuLdst *ldst,
6444                                             const TCGLdstHelperParam *parm,
6445                                             const TCGHelperInfo *info,
6446                                             unsigned next_arg)
6447 {
6448     TCGMovExtend ptr_mov = {
6449         .dst_type = TCG_TYPE_PTR,
6450         .src_type = TCG_TYPE_PTR,
6451         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6452     };
6453     const TCGCallArgumentLoc *loc = &info->in[0];
6454     TCGType type;
6455     unsigned slot;
6456     tcg_target_ulong imm;
6457 
6458     /*
6459      * Handle env, which is always first.
6460      */
6461     ptr_mov.dst = loc->arg_slot;
6462     ptr_mov.src = TCG_AREG0;
6463     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6464 
6465     /*
6466      * Handle oi.
6467      */
6468     imm = ldst->oi;
6469     loc = &info->in[next_arg];
6470     type = TCG_TYPE_I32;
6471     switch (loc->kind) {
6472     case TCG_CALL_ARG_NORMAL:
6473         break;
6474     case TCG_CALL_ARG_EXTEND_U:
6475     case TCG_CALL_ARG_EXTEND_S:
6476         /* No extension required for MemOpIdx. */
6477         tcg_debug_assert(imm <= INT32_MAX);
6478         type = TCG_TYPE_REG;
6479         break;
6480     default:
6481         g_assert_not_reached();
6482     }
6483     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6484     next_arg++;
6485 
6486     /*
6487      * Handle ra.
6488      */
6489     loc = &info->in[next_arg];
6490     slot = loc->arg_slot;
6491     if (parm->ra_gen) {
6492         int arg_reg = -1;
6493         TCGReg ra_reg;
6494 
6495         if (arg_slot_reg_p(slot)) {
6496             arg_reg = tcg_target_call_iarg_regs[slot];
6497         }
6498         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6499 
6500         ptr_mov.dst = slot;
6501         ptr_mov.src = ra_reg;
6502         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6503     } else {
6504         imm = (uintptr_t)ldst->raddr;
6505         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6506     }
6507 }
6508 
6509 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6510                                        const TCGCallArgumentLoc *loc,
6511                                        TCGType dst_type, TCGType src_type,
6512                                        TCGReg lo, TCGReg hi)
6513 {
6514     MemOp reg_mo;
6515 
6516     if (dst_type <= TCG_TYPE_REG) {
6517         MemOp src_ext;
6518 
6519         switch (loc->kind) {
6520         case TCG_CALL_ARG_NORMAL:
6521             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6522             break;
6523         case TCG_CALL_ARG_EXTEND_U:
6524             dst_type = TCG_TYPE_REG;
6525             src_ext = MO_UL;
6526             break;
6527         case TCG_CALL_ARG_EXTEND_S:
6528             dst_type = TCG_TYPE_REG;
6529             src_ext = MO_SL;
6530             break;
6531         default:
6532             g_assert_not_reached();
6533         }
6534 
6535         mov[0].dst = loc->arg_slot;
6536         mov[0].dst_type = dst_type;
6537         mov[0].src = lo;
6538         mov[0].src_type = src_type;
6539         mov[0].src_ext = src_ext;
6540         return 1;
6541     }
6542 
6543     if (TCG_TARGET_REG_BITS == 32) {
6544         assert(dst_type == TCG_TYPE_I64);
6545         reg_mo = MO_32;
6546     } else {
6547         assert(dst_type == TCG_TYPE_I128);
6548         reg_mo = MO_64;
6549     }
6550 
6551     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6552     mov[0].src = lo;
6553     mov[0].dst_type = TCG_TYPE_REG;
6554     mov[0].src_type = TCG_TYPE_REG;
6555     mov[0].src_ext = reg_mo;
6556 
6557     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6558     mov[1].src = hi;
6559     mov[1].dst_type = TCG_TYPE_REG;
6560     mov[1].src_type = TCG_TYPE_REG;
6561     mov[1].src_ext = reg_mo;
6562 
6563     return 2;
6564 }
6565 
6566 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6567                                    const TCGLdstHelperParam *parm)
6568 {
6569     const TCGHelperInfo *info;
6570     const TCGCallArgumentLoc *loc;
6571     TCGMovExtend mov[2];
6572     unsigned next_arg, nmov;
6573     MemOp mop = get_memop(ldst->oi);
6574 
6575     switch (mop & MO_SIZE) {
6576     case MO_8:
6577     case MO_16:
6578     case MO_32:
6579         info = &info_helper_ld32_mmu;
6580         break;
6581     case MO_64:
6582         info = &info_helper_ld64_mmu;
6583         break;
6584     case MO_128:
6585         info = &info_helper_ld128_mmu;
6586         break;
6587     default:
6588         g_assert_not_reached();
6589     }
6590 
6591     /* Defer env argument. */
6592     next_arg = 1;
6593 
6594     loc = &info->in[next_arg];
6595     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6596         /*
6597          * 32-bit host with 32-bit guest: zero-extend the guest address
6598          * to 64-bits for the helper by storing the low part, then
6599          * load a zero for the high part.
6600          */
6601         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6602                                TCG_TYPE_I32, TCG_TYPE_I32,
6603                                ldst->addr_reg, -1);
6604         tcg_out_helper_load_slots(s, 1, mov, parm);
6605 
6606         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6607                                 TCG_TYPE_I32, 0, parm);
6608         next_arg += 2;
6609     } else {
6610         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6611                                       ldst->addr_reg, -1);
6612         tcg_out_helper_load_slots(s, nmov, mov, parm);
6613         next_arg += nmov;
6614     }
6615 
6616     switch (info->out_kind) {
6617     case TCG_CALL_RET_NORMAL:
6618     case TCG_CALL_RET_BY_VEC:
6619         break;
6620     case TCG_CALL_RET_BY_REF:
6621         /*
6622          * The return reference is in the first argument slot.
6623          * We need memory in which to return: re-use the top of stack.
6624          */
6625         {
6626             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6627 
6628             if (arg_slot_reg_p(0)) {
6629                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6630                                  TCG_REG_CALL_STACK, ofs_slot0);
6631             } else {
6632                 tcg_debug_assert(parm->ntmp != 0);
6633                 tcg_out_addi_ptr(s, parm->tmp[0],
6634                                  TCG_REG_CALL_STACK, ofs_slot0);
6635                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6636                            TCG_REG_CALL_STACK, ofs_slot0);
6637             }
6638         }
6639         break;
6640     default:
6641         g_assert_not_reached();
6642     }
6643 
6644     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6645 }
6646 
6647 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6648                                   bool load_sign,
6649                                   const TCGLdstHelperParam *parm)
6650 {
6651     MemOp mop = get_memop(ldst->oi);
6652     TCGMovExtend mov[2];
6653     int ofs_slot0;
6654 
6655     switch (ldst->type) {
6656     case TCG_TYPE_I64:
6657         if (TCG_TARGET_REG_BITS == 32) {
6658             break;
6659         }
6660         /* fall through */
6661 
6662     case TCG_TYPE_I32:
6663         mov[0].dst = ldst->datalo_reg;
6664         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6665         mov[0].dst_type = ldst->type;
6666         mov[0].src_type = TCG_TYPE_REG;
6667 
6668         /*
6669          * If load_sign, then we allowed the helper to perform the
6670          * appropriate sign extension to tcg_target_ulong, and all
6671          * we need now is a plain move.
6672          *
6673          * If they do not, then we expect the relevant extension
6674          * instruction to be no more expensive than a move, and
6675          * we thus save the icache etc by only using one of two
6676          * helper functions.
6677          */
6678         if (load_sign || !(mop & MO_SIGN)) {
6679             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6680                 mov[0].src_ext = MO_32;
6681             } else {
6682                 mov[0].src_ext = MO_64;
6683             }
6684         } else {
6685             mov[0].src_ext = mop & MO_SSIZE;
6686         }
6687         tcg_out_movext1(s, mov);
6688         return;
6689 
6690     case TCG_TYPE_I128:
6691         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6692         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6693         switch (TCG_TARGET_CALL_RET_I128) {
6694         case TCG_CALL_RET_NORMAL:
6695             break;
6696         case TCG_CALL_RET_BY_VEC:
6697             tcg_out_st(s, TCG_TYPE_V128,
6698                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6699                        TCG_REG_CALL_STACK, ofs_slot0);
6700             /* fall through */
6701         case TCG_CALL_RET_BY_REF:
6702             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6703                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6704             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6705                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6706             return;
6707         default:
6708             g_assert_not_reached();
6709         }
6710         break;
6711 
6712     default:
6713         g_assert_not_reached();
6714     }
6715 
6716     mov[0].dst = ldst->datalo_reg;
6717     mov[0].src =
6718         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6719     mov[0].dst_type = TCG_TYPE_REG;
6720     mov[0].src_type = TCG_TYPE_REG;
6721     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6722 
6723     mov[1].dst = ldst->datahi_reg;
6724     mov[1].src =
6725         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6726     mov[1].dst_type = TCG_TYPE_REG;
6727     mov[1].src_type = TCG_TYPE_REG;
6728     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6729 
6730     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6731 }
6732 
6733 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6734                                    const TCGLdstHelperParam *parm)
6735 {
6736     const TCGHelperInfo *info;
6737     const TCGCallArgumentLoc *loc;
6738     TCGMovExtend mov[4];
6739     TCGType data_type;
6740     unsigned next_arg, nmov, n;
6741     MemOp mop = get_memop(ldst->oi);
6742 
6743     switch (mop & MO_SIZE) {
6744     case MO_8:
6745     case MO_16:
6746     case MO_32:
6747         info = &info_helper_st32_mmu;
6748         data_type = TCG_TYPE_I32;
6749         break;
6750     case MO_64:
6751         info = &info_helper_st64_mmu;
6752         data_type = TCG_TYPE_I64;
6753         break;
6754     case MO_128:
6755         info = &info_helper_st128_mmu;
6756         data_type = TCG_TYPE_I128;
6757         break;
6758     default:
6759         g_assert_not_reached();
6760     }
6761 
6762     /* Defer env argument. */
6763     next_arg = 1;
6764     nmov = 0;
6765 
6766     /* Handle addr argument. */
6767     loc = &info->in[next_arg];
6768     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6769     if (TCG_TARGET_REG_BITS == 32) {
6770         /*
6771          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6772          * to 64-bits for the helper by storing the low part.  Later,
6773          * after we have processed the register inputs, we will load a
6774          * zero for the high part.
6775          */
6776         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6777                                TCG_TYPE_I32, TCG_TYPE_I32,
6778                                ldst->addr_reg, -1);
6779         next_arg += 2;
6780         nmov += 1;
6781     } else {
6782         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6783                                    ldst->addr_reg, -1);
6784         next_arg += n;
6785         nmov += n;
6786     }
6787 
6788     /* Handle data argument. */
6789     loc = &info->in[next_arg];
6790     switch (loc->kind) {
6791     case TCG_CALL_ARG_NORMAL:
6792     case TCG_CALL_ARG_EXTEND_U:
6793     case TCG_CALL_ARG_EXTEND_S:
6794         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6795                                    ldst->datalo_reg, ldst->datahi_reg);
6796         next_arg += n;
6797         nmov += n;
6798         tcg_out_helper_load_slots(s, nmov, mov, parm);
6799         break;
6800 
6801     case TCG_CALL_ARG_BY_REF:
6802         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6803         tcg_debug_assert(data_type == TCG_TYPE_I128);
6804         tcg_out_st(s, TCG_TYPE_I64,
6805                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6806                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6807         tcg_out_st(s, TCG_TYPE_I64,
6808                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6809                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6810 
6811         tcg_out_helper_load_slots(s, nmov, mov, parm);
6812 
6813         if (arg_slot_reg_p(loc->arg_slot)) {
6814             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6815                              TCG_REG_CALL_STACK,
6816                              arg_slot_stk_ofs(loc->ref_slot));
6817         } else {
6818             tcg_debug_assert(parm->ntmp != 0);
6819             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6820                              arg_slot_stk_ofs(loc->ref_slot));
6821             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6822                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6823         }
6824         next_arg += 2;
6825         break;
6826 
6827     default:
6828         g_assert_not_reached();
6829     }
6830 
6831     if (TCG_TARGET_REG_BITS == 32) {
6832         /* Zero extend the address by loading a zero for the high part. */
6833         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6834         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6835     }
6836 
6837     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6838 }
6839 
6840 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6841 {
6842     int i, num_insns;
6843     TCGOp *op;
6844 
6845     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6846                  && qemu_log_in_addr_range(pc_start))) {
6847         FILE *logfile = qemu_log_trylock();
6848         if (logfile) {
6849             fprintf(logfile, "OP:\n");
6850             tcg_dump_ops(s, logfile, false);
6851             fprintf(logfile, "\n");
6852             qemu_log_unlock(logfile);
6853         }
6854     }
6855 
6856 #ifdef CONFIG_DEBUG_TCG
6857     /* Ensure all labels referenced have been emitted.  */
6858     {
6859         TCGLabel *l;
6860         bool error = false;
6861 
6862         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6863             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6864                 qemu_log_mask(CPU_LOG_TB_OP,
6865                               "$L%d referenced but not present.\n", l->id);
6866                 error = true;
6867             }
6868         }
6869         assert(!error);
6870     }
6871 #endif
6872 
6873     /* Do not reuse any EBB that may be allocated within the TB. */
6874     tcg_temp_ebb_reset_freed(s);
6875 
6876     tcg_optimize(s);
6877 
6878     reachable_code_pass(s);
6879     liveness_pass_0(s);
6880     liveness_pass_1(s);
6881 
6882     if (s->nb_indirects > 0) {
6883         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6884                      && qemu_log_in_addr_range(pc_start))) {
6885             FILE *logfile = qemu_log_trylock();
6886             if (logfile) {
6887                 fprintf(logfile, "OP before indirect lowering:\n");
6888                 tcg_dump_ops(s, logfile, false);
6889                 fprintf(logfile, "\n");
6890                 qemu_log_unlock(logfile);
6891             }
6892         }
6893 
6894         /* Replace indirect temps with direct temps.  */
6895         if (liveness_pass_2(s)) {
6896             /* If changes were made, re-run liveness.  */
6897             liveness_pass_1(s);
6898         }
6899     }
6900 
6901     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6902                  && qemu_log_in_addr_range(pc_start))) {
6903         FILE *logfile = qemu_log_trylock();
6904         if (logfile) {
6905             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6906             tcg_dump_ops(s, logfile, true);
6907             fprintf(logfile, "\n");
6908             qemu_log_unlock(logfile);
6909         }
6910     }
6911 
6912     /* Initialize goto_tb jump offsets. */
6913     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6914     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6915     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6916     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6917 
6918     tcg_reg_alloc_start(s);
6919 
6920     /*
6921      * Reset the buffer pointers when restarting after overflow.
6922      * TODO: Move this into translate-all.c with the rest of the
6923      * buffer management.  Having only this done here is confusing.
6924      */
6925     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6926     s->code_ptr = s->code_buf;
6927     s->data_gen_ptr = NULL;
6928 
6929     QSIMPLEQ_INIT(&s->ldst_labels);
6930     s->pool_labels = NULL;
6931 
6932     s->gen_insn_data =
6933         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * INSN_START_WORDS);
6934 
6935     tcg_out_tb_start(s);
6936 
6937     num_insns = -1;
6938     s->carry_live = false;
6939     QTAILQ_FOREACH(op, &s->ops, link) {
6940         TCGOpcode opc = op->opc;
6941 
6942         switch (opc) {
6943         case INDEX_op_extrl_i64_i32:
6944             assert(TCG_TARGET_REG_BITS == 64);
6945             /*
6946              * If TCG_TYPE_I32 is represented in some canonical form,
6947              * e.g. zero or sign-extended, then emit as a unary op.
6948              * Otherwise we can treat this as a plain move.
6949              * If the output dies, treat this as a plain move, because
6950              * this will be implemented with a store.
6951              */
6952             if (TCG_TARGET_HAS_extr_i64_i32) {
6953                 TCGLifeData arg_life = op->life;
6954                 if (!IS_DEAD_ARG(0)) {
6955                     goto do_default;
6956                 }
6957             }
6958             /* fall through */
6959         case INDEX_op_mov:
6960         case INDEX_op_mov_vec:
6961             tcg_reg_alloc_mov(s, op);
6962             break;
6963         case INDEX_op_dup_vec:
6964             tcg_reg_alloc_dup(s, op);
6965             break;
6966         case INDEX_op_insn_start:
6967             assert_carry_dead(s);
6968             if (num_insns >= 0) {
6969                 size_t off = tcg_current_code_size(s);
6970                 s->gen_insn_end_off[num_insns] = off;
6971                 /* Assert that we do not overflow our stored offset.  */
6972                 assert(s->gen_insn_end_off[num_insns] == off);
6973             }
6974             num_insns++;
6975             for (i = 0; i < INSN_START_WORDS; ++i) {
6976                 s->gen_insn_data[num_insns * INSN_START_WORDS + i] =
6977                     tcg_get_insn_start_param(op, i);
6978             }
6979             break;
6980         case INDEX_op_discard:
6981             temp_dead(s, arg_temp(op->args[0]));
6982             break;
6983         case INDEX_op_set_label:
6984             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6985             tcg_out_label(s, arg_label(op->args[0]));
6986             break;
6987         case INDEX_op_call:
6988             assert_carry_dead(s);
6989             tcg_reg_alloc_call(s, op);
6990             break;
6991         case INDEX_op_exit_tb:
6992             tcg_out_exit_tb(s, op->args[0]);
6993             break;
6994         case INDEX_op_goto_tb:
6995             tcg_out_goto_tb(s, op->args[0]);
6996             break;
6997         case INDEX_op_br:
6998             tcg_out_br(s, arg_label(op->args[0]));
6999             break;
7000         case INDEX_op_mb:
7001             tcg_out_mb(s, op->args[0]);
7002             break;
7003         case INDEX_op_dup2_vec:
7004             if (tcg_reg_alloc_dup2(s, op)) {
7005                 break;
7006             }
7007             /* fall through */
7008         default:
7009         do_default:
7010             /* Sanity check that we've not introduced any unhandled opcodes. */
7011             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
7012                                               TCGOP_FLAGS(op)));
7013             /* Note: in order to speed up the code, it would be much
7014                faster to have specialized register allocator functions for
7015                some common argument patterns */
7016             tcg_reg_alloc_op(s, op);
7017             break;
7018         }
7019         /* Test for (pending) buffer overflow.  The assumption is that any
7020            one operation beginning below the high water mark cannot overrun
7021            the buffer completely.  Thus we can test for overflow after
7022            generating code without having to check during generation.  */
7023         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
7024             return -1;
7025         }
7026         /* Test for TB overflow, as seen by gen_insn_end_off.  */
7027         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
7028             return -2;
7029         }
7030     }
7031     assert_carry_dead(s);
7032 
7033     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
7034     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
7035 
7036     /* Generate TB finalization at the end of block */
7037     i = tcg_out_ldst_finalize(s);
7038     if (i < 0) {
7039         return i;
7040     }
7041     i = tcg_out_pool_finalize(s);
7042     if (i < 0) {
7043         return i;
7044     }
7045     if (!tcg_resolve_relocs(s)) {
7046         return -2;
7047     }
7048 
7049 #ifndef CONFIG_TCG_INTERPRETER
7050     /* flush instruction cache */
7051     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
7052                         (uintptr_t)s->code_buf,
7053                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
7054 #endif
7055 
7056     return tcg_current_code_size(s);
7057 }
7058 
7059 #ifdef ELF_HOST_MACHINE
7060 /* In order to use this feature, the backend needs to do three things:
7061 
7062    (1) Define ELF_HOST_MACHINE to indicate both what value to
7063        put into the ELF image and to indicate support for the feature.
7064 
7065    (2) Define tcg_register_jit.  This should create a buffer containing
7066        the contents of a .debug_frame section that describes the post-
7067        prologue unwind info for the tcg machine.
7068 
7069    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
7070 */
7071 
7072 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
7073 typedef enum {
7074     JIT_NOACTION = 0,
7075     JIT_REGISTER_FN,
7076     JIT_UNREGISTER_FN
7077 } jit_actions_t;
7078 
7079 struct jit_code_entry {
7080     struct jit_code_entry *next_entry;
7081     struct jit_code_entry *prev_entry;
7082     const void *symfile_addr;
7083     uint64_t symfile_size;
7084 };
7085 
7086 struct jit_descriptor {
7087     uint32_t version;
7088     uint32_t action_flag;
7089     struct jit_code_entry *relevant_entry;
7090     struct jit_code_entry *first_entry;
7091 };
7092 
7093 void __jit_debug_register_code(void) __attribute__((noinline));
7094 void __jit_debug_register_code(void)
7095 {
7096     asm("");
7097 }
7098 
7099 /* Must statically initialize the version, because GDB may check
7100    the version before we can set it.  */
7101 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
7102 
7103 /* End GDB interface.  */
7104 
7105 static int find_string(const char *strtab, const char *str)
7106 {
7107     const char *p = strtab + 1;
7108 
7109     while (1) {
7110         if (strcmp(p, str) == 0) {
7111             return p - strtab;
7112         }
7113         p += strlen(p) + 1;
7114     }
7115 }
7116 
7117 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
7118                                  const void *debug_frame,
7119                                  size_t debug_frame_size)
7120 {
7121     struct __attribute__((packed)) DebugInfo {
7122         uint32_t  len;
7123         uint16_t  version;
7124         uint32_t  abbrev;
7125         uint8_t   ptr_size;
7126         uint8_t   cu_die;
7127         uint16_t  cu_lang;
7128         uintptr_t cu_low_pc;
7129         uintptr_t cu_high_pc;
7130         uint8_t   fn_die;
7131         char      fn_name[16];
7132         uintptr_t fn_low_pc;
7133         uintptr_t fn_high_pc;
7134         uint8_t   cu_eoc;
7135     };
7136 
7137     struct ElfImage {
7138         ElfW(Ehdr) ehdr;
7139         ElfW(Phdr) phdr;
7140         ElfW(Shdr) shdr[7];
7141         ElfW(Sym)  sym[2];
7142         struct DebugInfo di;
7143         uint8_t    da[24];
7144         char       str[80];
7145     };
7146 
7147     struct ElfImage *img;
7148 
7149     static const struct ElfImage img_template = {
7150         .ehdr = {
7151             .e_ident[EI_MAG0] = ELFMAG0,
7152             .e_ident[EI_MAG1] = ELFMAG1,
7153             .e_ident[EI_MAG2] = ELFMAG2,
7154             .e_ident[EI_MAG3] = ELFMAG3,
7155             .e_ident[EI_CLASS] = ELF_CLASS,
7156             .e_ident[EI_DATA] = ELF_DATA,
7157             .e_ident[EI_VERSION] = EV_CURRENT,
7158             .e_type = ET_EXEC,
7159             .e_machine = ELF_HOST_MACHINE,
7160             .e_version = EV_CURRENT,
7161             .e_phoff = offsetof(struct ElfImage, phdr),
7162             .e_shoff = offsetof(struct ElfImage, shdr),
7163             .e_ehsize = sizeof(ElfW(Shdr)),
7164             .e_phentsize = sizeof(ElfW(Phdr)),
7165             .e_phnum = 1,
7166             .e_shentsize = sizeof(ElfW(Shdr)),
7167             .e_shnum = ARRAY_SIZE(img->shdr),
7168             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
7169 #ifdef ELF_HOST_FLAGS
7170             .e_flags = ELF_HOST_FLAGS,
7171 #endif
7172 #ifdef ELF_OSABI
7173             .e_ident[EI_OSABI] = ELF_OSABI,
7174 #endif
7175         },
7176         .phdr = {
7177             .p_type = PT_LOAD,
7178             .p_flags = PF_X,
7179         },
7180         .shdr = {
7181             [0] = { .sh_type = SHT_NULL },
7182             /* Trick: The contents of code_gen_buffer are not present in
7183                this fake ELF file; that got allocated elsewhere.  Therefore
7184                we mark .text as SHT_NOBITS (similar to .bss) so that readers
7185                will not look for contents.  We can record any address.  */
7186             [1] = { /* .text */
7187                 .sh_type = SHT_NOBITS,
7188                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
7189             },
7190             [2] = { /* .debug_info */
7191                 .sh_type = SHT_PROGBITS,
7192                 .sh_offset = offsetof(struct ElfImage, di),
7193                 .sh_size = sizeof(struct DebugInfo),
7194             },
7195             [3] = { /* .debug_abbrev */
7196                 .sh_type = SHT_PROGBITS,
7197                 .sh_offset = offsetof(struct ElfImage, da),
7198                 .sh_size = sizeof(img->da),
7199             },
7200             [4] = { /* .debug_frame */
7201                 .sh_type = SHT_PROGBITS,
7202                 .sh_offset = sizeof(struct ElfImage),
7203             },
7204             [5] = { /* .symtab */
7205                 .sh_type = SHT_SYMTAB,
7206                 .sh_offset = offsetof(struct ElfImage, sym),
7207                 .sh_size = sizeof(img->sym),
7208                 .sh_info = 1,
7209                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
7210                 .sh_entsize = sizeof(ElfW(Sym)),
7211             },
7212             [6] = { /* .strtab */
7213                 .sh_type = SHT_STRTAB,
7214                 .sh_offset = offsetof(struct ElfImage, str),
7215                 .sh_size = sizeof(img->str),
7216             }
7217         },
7218         .sym = {
7219             [1] = { /* code_gen_buffer */
7220                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
7221                 .st_shndx = 1,
7222             }
7223         },
7224         .di = {
7225             .len = sizeof(struct DebugInfo) - 4,
7226             .version = 2,
7227             .ptr_size = sizeof(void *),
7228             .cu_die = 1,
7229             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
7230             .fn_die = 2,
7231             .fn_name = "code_gen_buffer"
7232         },
7233         .da = {
7234             1,          /* abbrev number (the cu) */
7235             0x11, 1,    /* DW_TAG_compile_unit, has children */
7236             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
7237             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
7238             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
7239             0, 0,       /* end of abbrev */
7240             2,          /* abbrev number (the fn) */
7241             0x2e, 0,    /* DW_TAG_subprogram, no children */
7242             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
7243             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
7244             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
7245             0, 0,       /* end of abbrev */
7246             0           /* no more abbrev */
7247         },
7248         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
7249                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
7250     };
7251 
7252     /* We only need a single jit entry; statically allocate it.  */
7253     static struct jit_code_entry one_entry;
7254 
7255     uintptr_t buf = (uintptr_t)buf_ptr;
7256     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
7257     DebugFrameHeader *dfh;
7258 
7259     img = g_malloc(img_size);
7260     *img = img_template;
7261 
7262     img->phdr.p_vaddr = buf;
7263     img->phdr.p_paddr = buf;
7264     img->phdr.p_memsz = buf_size;
7265 
7266     img->shdr[1].sh_name = find_string(img->str, ".text");
7267     img->shdr[1].sh_addr = buf;
7268     img->shdr[1].sh_size = buf_size;
7269 
7270     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
7271     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
7272 
7273     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
7274     img->shdr[4].sh_size = debug_frame_size;
7275 
7276     img->shdr[5].sh_name = find_string(img->str, ".symtab");
7277     img->shdr[6].sh_name = find_string(img->str, ".strtab");
7278 
7279     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
7280     img->sym[1].st_value = buf;
7281     img->sym[1].st_size = buf_size;
7282 
7283     img->di.cu_low_pc = buf;
7284     img->di.cu_high_pc = buf + buf_size;
7285     img->di.fn_low_pc = buf;
7286     img->di.fn_high_pc = buf + buf_size;
7287 
7288     dfh = (DebugFrameHeader *)(img + 1);
7289     memcpy(dfh, debug_frame, debug_frame_size);
7290     dfh->fde.func_start = buf;
7291     dfh->fde.func_len = buf_size;
7292 
7293 #ifdef DEBUG_JIT
7294     /* Enable this block to be able to debug the ELF image file creation.
7295        One can use readelf, objdump, or other inspection utilities.  */
7296     {
7297         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
7298         FILE *f = fopen(jit, "w+b");
7299         if (f) {
7300             if (fwrite(img, img_size, 1, f) != img_size) {
7301                 /* Avoid stupid unused return value warning for fwrite.  */
7302             }
7303             fclose(f);
7304         }
7305     }
7306 #endif
7307 
7308     one_entry.symfile_addr = img;
7309     one_entry.symfile_size = img_size;
7310 
7311     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
7312     __jit_debug_descriptor.relevant_entry = &one_entry;
7313     __jit_debug_descriptor.first_entry = &one_entry;
7314     __jit_debug_register_code();
7315 }
7316 #else
7317 /* No support for the feature.  Provide the entry point expected by exec.c,
7318    and implement the internal function we declared earlier.  */
7319 
7320 static void tcg_register_jit_int(const void *buf, size_t size,
7321                                  const void *debug_frame,
7322                                  size_t debug_frame_size)
7323 {
7324 }
7325 
7326 void tcg_register_jit(const void *buf, size_t buf_size)
7327 {
7328 }
7329 #endif /* ELF_HOST_MACHINE */
7330 
7331 #if !TCG_TARGET_MAYBE_vec
7332 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
7333 {
7334     g_assert_not_reached();
7335 }
7336 #endif
7337