xref: /qemu/tcg/tcg.c (revision f26137893b98c6e1fd6819d5f13cb74fafcdcff9)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
104     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
105     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
106     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
107     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
108     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
109     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
110 };
111 
112 static void tcg_register_jit_int(const void *buf, size_t size,
113                                  const void *debug_frame,
114                                  size_t debug_frame_size)
115     __attribute__((unused));
116 
117 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
118 static void tcg_out_tb_start(TCGContext *s);
119 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
120                        intptr_t arg2);
121 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_movi(TCGContext *s, TCGType type,
123                          TCGReg ret, tcg_target_long arg);
124 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
133 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
134 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
135 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
136 static void tcg_out_goto_tb(TCGContext *s, int which);
137 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
138                        const TCGArg args[TCG_MAX_OP_ARGS],
139                        const int const_args[TCG_MAX_OP_ARGS]);
140 #if TCG_TARGET_MAYBE_vec
141 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
142                             TCGReg dst, TCGReg src);
143 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
144                              TCGReg dst, TCGReg base, intptr_t offset);
145 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
146                              TCGReg dst, int64_t arg);
147 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
148                            unsigned vecl, unsigned vece,
149                            const TCGArg args[TCG_MAX_OP_ARGS],
150                            const int const_args[TCG_MAX_OP_ARGS]);
151 #else
152 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
153                                    TCGReg dst, TCGReg src)
154 {
155     g_assert_not_reached();
156 }
157 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
158                                     TCGReg dst, TCGReg base, intptr_t offset)
159 {
160     g_assert_not_reached();
161 }
162 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
163                                     TCGReg dst, int64_t arg)
164 {
165     g_assert_not_reached();
166 }
167 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
168                                   unsigned vecl, unsigned vece,
169                                   const TCGArg args[TCG_MAX_OP_ARGS],
170                                   const int const_args[TCG_MAX_OP_ARGS])
171 {
172     g_assert_not_reached();
173 }
174 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
175 {
176     return 0;
177 }
178 #endif
179 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
180                        intptr_t arg2);
181 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
182                         TCGReg base, intptr_t ofs);
183 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
184                          const TCGHelperInfo *info);
185 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
186 static bool tcg_target_const_match(int64_t val, int ct,
187                                    TCGType type, TCGCond cond, int vece);
188 
189 #ifndef CONFIG_USER_ONLY
190 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
191 #endif
192 
193 typedef struct TCGLdstHelperParam {
194     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
195     unsigned ntmp;
196     int tmp[3];
197 } TCGLdstHelperParam;
198 
199 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
200                                    const TCGLdstHelperParam *p)
201     __attribute__((unused));
202 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
203                                   bool load_sign, const TCGLdstHelperParam *p)
204     __attribute__((unused));
205 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
206                                    const TCGLdstHelperParam *p)
207     __attribute__((unused));
208 
209 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
210     [MO_UB] = helper_ldub_mmu,
211     [MO_SB] = helper_ldsb_mmu,
212     [MO_UW] = helper_lduw_mmu,
213     [MO_SW] = helper_ldsw_mmu,
214     [MO_UL] = helper_ldul_mmu,
215     [MO_UQ] = helper_ldq_mmu,
216 #if TCG_TARGET_REG_BITS == 64
217     [MO_SL] = helper_ldsl_mmu,
218     [MO_128] = helper_ld16_mmu,
219 #endif
220 };
221 
222 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
223     [MO_8]  = helper_stb_mmu,
224     [MO_16] = helper_stw_mmu,
225     [MO_32] = helper_stl_mmu,
226     [MO_64] = helper_stq_mmu,
227 #if TCG_TARGET_REG_BITS == 64
228     [MO_128] = helper_st16_mmu,
229 #endif
230 };
231 
232 typedef struct {
233     MemOp atom;   /* lg2 bits of atomicity required */
234     MemOp align;  /* lg2 bits of alignment to use */
235 } TCGAtomAlign;
236 
237 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
238                                            MemOp host_atom, bool allow_two_ops)
239     __attribute__((unused));
240 
241 #ifdef CONFIG_USER_ONLY
242 bool tcg_use_softmmu;
243 #endif
244 
245 TCGContext tcg_init_ctx;
246 __thread TCGContext *tcg_ctx;
247 
248 TCGContext **tcg_ctxs;
249 unsigned int tcg_cur_ctxs;
250 unsigned int tcg_max_ctxs;
251 TCGv_env tcg_env;
252 const void *tcg_code_gen_epilogue;
253 uintptr_t tcg_splitwx_diff;
254 
255 #ifndef CONFIG_TCG_INTERPRETER
256 tcg_prologue_fn *tcg_qemu_tb_exec;
257 #endif
258 
259 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
260 static TCGRegSet tcg_target_call_clobber_regs;
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE == 1
263 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
264 {
265     *s->code_ptr++ = v;
266 }
267 
268 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
269                                                       uint8_t v)
270 {
271     *p = v;
272 }
273 #endif
274 
275 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
276 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
277 {
278     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
279         *s->code_ptr++ = v;
280     } else {
281         tcg_insn_unit *p = s->code_ptr;
282         memcpy(p, &v, sizeof(v));
283         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
284     }
285 }
286 
287 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
288                                                        uint16_t v)
289 {
290     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
291         *p = v;
292     } else {
293         memcpy(p, &v, sizeof(v));
294     }
295 }
296 #endif
297 
298 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
299 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
300 {
301     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
302         *s->code_ptr++ = v;
303     } else {
304         tcg_insn_unit *p = s->code_ptr;
305         memcpy(p, &v, sizeof(v));
306         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
307     }
308 }
309 
310 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
311                                                        uint32_t v)
312 {
313     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
314         *p = v;
315     } else {
316         memcpy(p, &v, sizeof(v));
317     }
318 }
319 #endif
320 
321 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
322 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
323 {
324     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
325         *s->code_ptr++ = v;
326     } else {
327         tcg_insn_unit *p = s->code_ptr;
328         memcpy(p, &v, sizeof(v));
329         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
330     }
331 }
332 
333 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
334                                                        uint64_t v)
335 {
336     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
337         *p = v;
338     } else {
339         memcpy(p, &v, sizeof(v));
340     }
341 }
342 #endif
343 
344 /* label relocation processing */
345 
346 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
347                           TCGLabel *l, intptr_t addend)
348 {
349     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
350 
351     r->type = type;
352     r->ptr = code_ptr;
353     r->addend = addend;
354     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
355 }
356 
357 static void tcg_out_label(TCGContext *s, TCGLabel *l)
358 {
359     tcg_debug_assert(!l->has_value);
360     l->has_value = 1;
361     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
362 }
363 
364 TCGLabel *gen_new_label(void)
365 {
366     TCGContext *s = tcg_ctx;
367     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
368 
369     memset(l, 0, sizeof(TCGLabel));
370     l->id = s->nb_labels++;
371     QSIMPLEQ_INIT(&l->branches);
372     QSIMPLEQ_INIT(&l->relocs);
373 
374     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
375 
376     return l;
377 }
378 
379 static bool tcg_resolve_relocs(TCGContext *s)
380 {
381     TCGLabel *l;
382 
383     QSIMPLEQ_FOREACH(l, &s->labels, next) {
384         TCGRelocation *r;
385         uintptr_t value = l->u.value;
386 
387         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
388             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
389                 return false;
390             }
391         }
392     }
393     return true;
394 }
395 
396 static void set_jmp_reset_offset(TCGContext *s, int which)
397 {
398     /*
399      * We will check for overflow at the end of the opcode loop in
400      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
401      */
402     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
403 }
404 
405 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
406 {
407     /*
408      * We will check for overflow at the end of the opcode loop in
409      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
410      */
411     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
412 }
413 
414 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
415 {
416     /*
417      * Return the read-execute version of the pointer, for the benefit
418      * of any pc-relative addressing mode.
419      */
420     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
421 }
422 
423 static int __attribute__((unused))
424 tlb_mask_table_ofs(TCGContext *s, int which)
425 {
426     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
427             sizeof(CPUNegativeOffsetState));
428 }
429 
430 /* Signal overflow, starting over with fewer guest insns. */
431 static G_NORETURN
432 void tcg_raise_tb_overflow(TCGContext *s)
433 {
434     siglongjmp(s->jmp_trans, -2);
435 }
436 
437 /*
438  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
439  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
440  *
441  * However, tcg_out_helper_load_slots reuses this field to hold an
442  * argument slot number (which may designate a argument register or an
443  * argument stack slot), converting to TCGReg once all arguments that
444  * are destined for the stack are processed.
445  */
446 typedef struct TCGMovExtend {
447     unsigned dst;
448     TCGReg src;
449     TCGType dst_type;
450     TCGType src_type;
451     MemOp src_ext;
452 } TCGMovExtend;
453 
454 /**
455  * tcg_out_movext -- move and extend
456  * @s: tcg context
457  * @dst_type: integral type for destination
458  * @dst: destination register
459  * @src_type: integral type for source
460  * @src_ext: extension to apply to source
461  * @src: source register
462  *
463  * Move or extend @src into @dst, depending on @src_ext and the types.
464  */
465 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
466                            TCGType src_type, MemOp src_ext, TCGReg src)
467 {
468     switch (src_ext) {
469     case MO_UB:
470         tcg_out_ext8u(s, dst, src);
471         break;
472     case MO_SB:
473         tcg_out_ext8s(s, dst_type, dst, src);
474         break;
475     case MO_UW:
476         tcg_out_ext16u(s, dst, src);
477         break;
478     case MO_SW:
479         tcg_out_ext16s(s, dst_type, dst, src);
480         break;
481     case MO_UL:
482     case MO_SL:
483         if (dst_type == TCG_TYPE_I32) {
484             if (src_type == TCG_TYPE_I32) {
485                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
486             } else {
487                 tcg_out_extrl_i64_i32(s, dst, src);
488             }
489         } else if (src_type == TCG_TYPE_I32) {
490             if (src_ext & MO_SIGN) {
491                 tcg_out_exts_i32_i64(s, dst, src);
492             } else {
493                 tcg_out_extu_i32_i64(s, dst, src);
494             }
495         } else {
496             if (src_ext & MO_SIGN) {
497                 tcg_out_ext32s(s, dst, src);
498             } else {
499                 tcg_out_ext32u(s, dst, src);
500             }
501         }
502         break;
503     case MO_UQ:
504         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
505         if (dst_type == TCG_TYPE_I32) {
506             tcg_out_extrl_i64_i32(s, dst, src);
507         } else {
508             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
509         }
510         break;
511     default:
512         g_assert_not_reached();
513     }
514 }
515 
516 /* Minor variations on a theme, using a structure. */
517 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
518                                     TCGReg src)
519 {
520     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
521 }
522 
523 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
524 {
525     tcg_out_movext1_new_src(s, i, i->src);
526 }
527 
528 /**
529  * tcg_out_movext2 -- move and extend two pair
530  * @s: tcg context
531  * @i1: first move description
532  * @i2: second move description
533  * @scratch: temporary register, or -1 for none
534  *
535  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
536  * between the sources and destinations.
537  */
538 
539 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
540                             const TCGMovExtend *i2, int scratch)
541 {
542     TCGReg src1 = i1->src;
543     TCGReg src2 = i2->src;
544 
545     if (i1->dst != src2) {
546         tcg_out_movext1(s, i1);
547         tcg_out_movext1(s, i2);
548         return;
549     }
550     if (i2->dst == src1) {
551         TCGType src1_type = i1->src_type;
552         TCGType src2_type = i2->src_type;
553 
554         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
555             /* The data is now in the correct registers, now extend. */
556             src1 = i2->src;
557             src2 = i1->src;
558         } else {
559             tcg_debug_assert(scratch >= 0);
560             tcg_out_mov(s, src1_type, scratch, src1);
561             src1 = scratch;
562         }
563     }
564     tcg_out_movext1_new_src(s, i2, src2);
565     tcg_out_movext1_new_src(s, i1, src1);
566 }
567 
568 /**
569  * tcg_out_movext3 -- move and extend three pair
570  * @s: tcg context
571  * @i1: first move description
572  * @i2: second move description
573  * @i3: third move description
574  * @scratch: temporary register, or -1 for none
575  *
576  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
577  * between the sources and destinations.
578  */
579 
580 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
581                             const TCGMovExtend *i2, const TCGMovExtend *i3,
582                             int scratch)
583 {
584     TCGReg src1 = i1->src;
585     TCGReg src2 = i2->src;
586     TCGReg src3 = i3->src;
587 
588     if (i1->dst != src2 && i1->dst != src3) {
589         tcg_out_movext1(s, i1);
590         tcg_out_movext2(s, i2, i3, scratch);
591         return;
592     }
593     if (i2->dst != src1 && i2->dst != src3) {
594         tcg_out_movext1(s, i2);
595         tcg_out_movext2(s, i1, i3, scratch);
596         return;
597     }
598     if (i3->dst != src1 && i3->dst != src2) {
599         tcg_out_movext1(s, i3);
600         tcg_out_movext2(s, i1, i2, scratch);
601         return;
602     }
603 
604     /*
605      * There is a cycle.  Since there are only 3 nodes, the cycle is
606      * either "clockwise" or "anti-clockwise", and can be solved with
607      * a single scratch or two xchg.
608      */
609     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
610         /* "Clockwise" */
611         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
612             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
613             /* The data is now in the correct registers, now extend. */
614             tcg_out_movext1_new_src(s, i1, i1->dst);
615             tcg_out_movext1_new_src(s, i2, i2->dst);
616             tcg_out_movext1_new_src(s, i3, i3->dst);
617         } else {
618             tcg_debug_assert(scratch >= 0);
619             tcg_out_mov(s, i1->src_type, scratch, src1);
620             tcg_out_movext1(s, i3);
621             tcg_out_movext1(s, i2);
622             tcg_out_movext1_new_src(s, i1, scratch);
623         }
624     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
625         /* "Anti-clockwise" */
626         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
627             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
628             /* The data is now in the correct registers, now extend. */
629             tcg_out_movext1_new_src(s, i1, i1->dst);
630             tcg_out_movext1_new_src(s, i2, i2->dst);
631             tcg_out_movext1_new_src(s, i3, i3->dst);
632         } else {
633             tcg_debug_assert(scratch >= 0);
634             tcg_out_mov(s, i1->src_type, scratch, src1);
635             tcg_out_movext1(s, i2);
636             tcg_out_movext1(s, i3);
637             tcg_out_movext1_new_src(s, i1, scratch);
638         }
639     } else {
640         g_assert_not_reached();
641     }
642 }
643 
644 /*
645  * Allocate a new TCGLabelQemuLdst entry.
646  */
647 
648 __attribute__((unused))
649 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
650 {
651     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
652 
653     memset(l, 0, sizeof(*l));
654     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
655 
656     return l;
657 }
658 
659 /*
660  * Allocate new constant pool entries.
661  */
662 
663 typedef struct TCGLabelPoolData {
664     struct TCGLabelPoolData *next;
665     tcg_insn_unit *label;
666     intptr_t addend;
667     int rtype;
668     unsigned nlong;
669     tcg_target_ulong data[];
670 } TCGLabelPoolData;
671 
672 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
673                                         tcg_insn_unit *label, intptr_t addend)
674 {
675     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
676                                      + sizeof(tcg_target_ulong) * nlong);
677 
678     n->label = label;
679     n->addend = addend;
680     n->rtype = rtype;
681     n->nlong = nlong;
682     return n;
683 }
684 
685 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
686 {
687     TCGLabelPoolData *i, **pp;
688     int nlong = n->nlong;
689 
690     /* Insertion sort on the pool.  */
691     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
692         if (nlong > i->nlong) {
693             break;
694         }
695         if (nlong < i->nlong) {
696             continue;
697         }
698         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
699             break;
700         }
701     }
702     n->next = *pp;
703     *pp = n;
704 }
705 
706 /* The "usual" for generic integer code.  */
707 __attribute__((unused))
708 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
709                            tcg_insn_unit *label, intptr_t addend)
710 {
711     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
712     n->data[0] = d;
713     new_pool_insert(s, n);
714 }
715 
716 /* For v64 or v128, depending on the host.  */
717 __attribute__((unused))
718 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
719                         intptr_t addend, tcg_target_ulong d0,
720                         tcg_target_ulong d1)
721 {
722     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
723     n->data[0] = d0;
724     n->data[1] = d1;
725     new_pool_insert(s, n);
726 }
727 
728 /* For v128 or v256, depending on the host.  */
729 __attribute__((unused))
730 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
731                         intptr_t addend, tcg_target_ulong d0,
732                         tcg_target_ulong d1, tcg_target_ulong d2,
733                         tcg_target_ulong d3)
734 {
735     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
736     n->data[0] = d0;
737     n->data[1] = d1;
738     n->data[2] = d2;
739     n->data[3] = d3;
740     new_pool_insert(s, n);
741 }
742 
743 /* For v256, for 32-bit host.  */
744 __attribute__((unused))
745 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
746                         intptr_t addend, tcg_target_ulong d0,
747                         tcg_target_ulong d1, tcg_target_ulong d2,
748                         tcg_target_ulong d3, tcg_target_ulong d4,
749                         tcg_target_ulong d5, tcg_target_ulong d6,
750                         tcg_target_ulong d7)
751 {
752     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
753     n->data[0] = d0;
754     n->data[1] = d1;
755     n->data[2] = d2;
756     n->data[3] = d3;
757     n->data[4] = d4;
758     n->data[5] = d5;
759     n->data[6] = d6;
760     n->data[7] = d7;
761     new_pool_insert(s, n);
762 }
763 
764 /*
765  * Generate TB finalization at the end of block
766  */
767 
768 static int tcg_out_ldst_finalize(TCGContext *s)
769 {
770     TCGLabelQemuLdst *lb;
771 
772     /* qemu_ld/st slow paths */
773     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
774         if (lb->is_ld
775             ? !tcg_out_qemu_ld_slow_path(s, lb)
776             : !tcg_out_qemu_st_slow_path(s, lb)) {
777             return -2;
778         }
779 
780         /*
781          * Test for (pending) buffer overflow.  The assumption is that any
782          * one operation beginning below the high water mark cannot overrun
783          * the buffer completely.  Thus we can test for overflow after
784          * generating code without having to check during generation.
785          */
786         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
787             return -1;
788         }
789     }
790     return 0;
791 }
792 
793 static int tcg_out_pool_finalize(TCGContext *s)
794 {
795     TCGLabelPoolData *p = s->pool_labels;
796     TCGLabelPoolData *l = NULL;
797     void *a;
798 
799     if (p == NULL) {
800         return 0;
801     }
802 
803     /*
804      * ??? Round up to qemu_icache_linesize, but then do not round
805      * again when allocating the next TranslationBlock structure.
806      */
807     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
808                          sizeof(tcg_target_ulong) * p->nlong);
809     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
810     s->data_gen_ptr = a;
811 
812     for (; p != NULL; p = p->next) {
813         size_t size = sizeof(tcg_target_ulong) * p->nlong;
814         uintptr_t value;
815 
816         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
817             if (unlikely(a > s->code_gen_highwater)) {
818                 return -1;
819             }
820             memcpy(a, p->data, size);
821             a += size;
822             l = p;
823         }
824 
825         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
826         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
827             return -2;
828         }
829     }
830 
831     s->code_ptr = a;
832     return 0;
833 }
834 
835 #define C_PFX1(P, A)                    P##A
836 #define C_PFX2(P, A, B)                 P##A##_##B
837 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
838 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
839 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
840 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
841 
842 /* Define an enumeration for the various combinations. */
843 
844 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
845 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
846 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
847 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
848 
849 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
850 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
851 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
852 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
853 
854 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
855 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
856 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
857 
858 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
859 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
860 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
861 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
862 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
863 
864 typedef enum {
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 #include "tcg-target.c.inc"
959 
960 #ifndef CONFIG_TCG_INTERPRETER
961 /* Validate CPUTLBDescFast placement. */
962 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
963                         sizeof(CPUNegativeOffsetState))
964                   < MIN_TLB_MASK_TABLE_OFS);
965 #endif
966 
967 /*
968  * All TCG threads except the parent (i.e. the one that called tcg_context_init
969  * and registered the target's TCG globals) must register with this function
970  * before initiating translation.
971  *
972  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
973  * of tcg_region_init() for the reasoning behind this.
974  *
975  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
976  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
977  * is not used anymore for translation once this function is called.
978  *
979  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
980  * iterates over the array (e.g. tcg_code_size() the same for both system/user
981  * modes.
982  */
983 #ifdef CONFIG_USER_ONLY
984 void tcg_register_thread(void)
985 {
986     tcg_ctx = &tcg_init_ctx;
987 }
988 #else
989 void tcg_register_thread(void)
990 {
991     TCGContext *s = g_malloc(sizeof(*s));
992     unsigned int i, n;
993 
994     *s = tcg_init_ctx;
995 
996     /* Relink mem_base.  */
997     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
998         if (tcg_init_ctx.temps[i].mem_base) {
999             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1000             tcg_debug_assert(b >= 0 && b < n);
1001             s->temps[i].mem_base = &s->temps[b];
1002         }
1003     }
1004 
1005     /* Claim an entry in tcg_ctxs */
1006     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1007     g_assert(n < tcg_max_ctxs);
1008     qatomic_set(&tcg_ctxs[n], s);
1009 
1010     if (n > 0) {
1011         tcg_region_initial_alloc(s);
1012     }
1013 
1014     tcg_ctx = s;
1015 }
1016 #endif /* !CONFIG_USER_ONLY */
1017 
1018 /* pool based memory allocation */
1019 void *tcg_malloc_internal(TCGContext *s, int size)
1020 {
1021     TCGPool *p;
1022     int pool_size;
1023 
1024     if (size > TCG_POOL_CHUNK_SIZE) {
1025         /* big malloc: insert a new pool (XXX: could optimize) */
1026         p = g_malloc(sizeof(TCGPool) + size);
1027         p->size = size;
1028         p->next = s->pool_first_large;
1029         s->pool_first_large = p;
1030         return p->data;
1031     } else {
1032         p = s->pool_current;
1033         if (!p) {
1034             p = s->pool_first;
1035             if (!p)
1036                 goto new_pool;
1037         } else {
1038             if (!p->next) {
1039             new_pool:
1040                 pool_size = TCG_POOL_CHUNK_SIZE;
1041                 p = g_malloc(sizeof(TCGPool) + pool_size);
1042                 p->size = pool_size;
1043                 p->next = NULL;
1044                 if (s->pool_current) {
1045                     s->pool_current->next = p;
1046                 } else {
1047                     s->pool_first = p;
1048                 }
1049             } else {
1050                 p = p->next;
1051             }
1052         }
1053     }
1054     s->pool_current = p;
1055     s->pool_cur = p->data + size;
1056     s->pool_end = p->data + p->size;
1057     return p->data;
1058 }
1059 
1060 void tcg_pool_reset(TCGContext *s)
1061 {
1062     TCGPool *p, *t;
1063     for (p = s->pool_first_large; p; p = t) {
1064         t = p->next;
1065         g_free(p);
1066     }
1067     s->pool_first_large = NULL;
1068     s->pool_cur = s->pool_end = NULL;
1069     s->pool_current = NULL;
1070 }
1071 
1072 /*
1073  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1074  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1075  * We only use these for layout in tcg_out_ld_helper_ret and
1076  * tcg_out_st_helper_args, and share them between several of
1077  * the helpers, with the end result that it's easier to build manually.
1078  */
1079 
1080 #if TCG_TARGET_REG_BITS == 32
1081 # define dh_typecode_ttl  dh_typecode_i32
1082 #else
1083 # define dh_typecode_ttl  dh_typecode_i64
1084 #endif
1085 
1086 static TCGHelperInfo info_helper_ld32_mmu = {
1087     .flags = TCG_CALL_NO_WG,
1088     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1089               | dh_typemask(env, 1)
1090               | dh_typemask(i64, 2)  /* uint64_t addr */
1091               | dh_typemask(i32, 3)  /* unsigned oi */
1092               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1093 };
1094 
1095 static TCGHelperInfo info_helper_ld64_mmu = {
1096     .flags = TCG_CALL_NO_WG,
1097     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1098               | dh_typemask(env, 1)
1099               | dh_typemask(i64, 2)  /* uint64_t addr */
1100               | dh_typemask(i32, 3)  /* unsigned oi */
1101               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1102 };
1103 
1104 static TCGHelperInfo info_helper_ld128_mmu = {
1105     .flags = TCG_CALL_NO_WG,
1106     .typemask = dh_typemask(i128, 0) /* return Int128 */
1107               | dh_typemask(env, 1)
1108               | dh_typemask(i64, 2)  /* uint64_t addr */
1109               | dh_typemask(i32, 3)  /* unsigned oi */
1110               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1111 };
1112 
1113 static TCGHelperInfo info_helper_st32_mmu = {
1114     .flags = TCG_CALL_NO_WG,
1115     .typemask = dh_typemask(void, 0)
1116               | dh_typemask(env, 1)
1117               | dh_typemask(i64, 2)  /* uint64_t addr */
1118               | dh_typemask(i32, 3)  /* uint32_t data */
1119               | dh_typemask(i32, 4)  /* unsigned oi */
1120               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1121 };
1122 
1123 static TCGHelperInfo info_helper_st64_mmu = {
1124     .flags = TCG_CALL_NO_WG,
1125     .typemask = dh_typemask(void, 0)
1126               | dh_typemask(env, 1)
1127               | dh_typemask(i64, 2)  /* uint64_t addr */
1128               | dh_typemask(i64, 3)  /* uint64_t data */
1129               | dh_typemask(i32, 4)  /* unsigned oi */
1130               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1131 };
1132 
1133 static TCGHelperInfo info_helper_st128_mmu = {
1134     .flags = TCG_CALL_NO_WG,
1135     .typemask = dh_typemask(void, 0)
1136               | dh_typemask(env, 1)
1137               | dh_typemask(i64, 2)  /* uint64_t addr */
1138               | dh_typemask(i128, 3) /* Int128 data */
1139               | dh_typemask(i32, 4)  /* unsigned oi */
1140               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1141 };
1142 
1143 #ifdef CONFIG_TCG_INTERPRETER
1144 static ffi_type *typecode_to_ffi(int argmask)
1145 {
1146     /*
1147      * libffi does not support __int128_t, so we have forced Int128
1148      * to use the structure definition instead of the builtin type.
1149      */
1150     static ffi_type *ffi_type_i128_elements[3] = {
1151         &ffi_type_uint64,
1152         &ffi_type_uint64,
1153         NULL
1154     };
1155     static ffi_type ffi_type_i128 = {
1156         .size = 16,
1157         .alignment = __alignof__(Int128),
1158         .type = FFI_TYPE_STRUCT,
1159         .elements = ffi_type_i128_elements,
1160     };
1161 
1162     switch (argmask) {
1163     case dh_typecode_void:
1164         return &ffi_type_void;
1165     case dh_typecode_i32:
1166         return &ffi_type_uint32;
1167     case dh_typecode_s32:
1168         return &ffi_type_sint32;
1169     case dh_typecode_i64:
1170         return &ffi_type_uint64;
1171     case dh_typecode_s64:
1172         return &ffi_type_sint64;
1173     case dh_typecode_ptr:
1174         return &ffi_type_pointer;
1175     case dh_typecode_i128:
1176         return &ffi_type_i128;
1177     }
1178     g_assert_not_reached();
1179 }
1180 
1181 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1182 {
1183     unsigned typemask = info->typemask;
1184     struct {
1185         ffi_cif cif;
1186         ffi_type *args[];
1187     } *ca;
1188     ffi_status status;
1189     int nargs;
1190 
1191     /* Ignoring the return type, find the last non-zero field. */
1192     nargs = 32 - clz32(typemask >> 3);
1193     nargs = DIV_ROUND_UP(nargs, 3);
1194     assert(nargs <= MAX_CALL_IARGS);
1195 
1196     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1197     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1198     ca->cif.nargs = nargs;
1199 
1200     if (nargs != 0) {
1201         ca->cif.arg_types = ca->args;
1202         for (int j = 0; j < nargs; ++j) {
1203             int typecode = extract32(typemask, (j + 1) * 3, 3);
1204             ca->args[j] = typecode_to_ffi(typecode);
1205         }
1206     }
1207 
1208     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1209                           ca->cif.rtype, ca->cif.arg_types);
1210     assert(status == FFI_OK);
1211 
1212     return &ca->cif;
1213 }
1214 
1215 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1216 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1217 #else
1218 #define HELPER_INFO_INIT(I)      (&(I)->init)
1219 #define HELPER_INFO_INIT_VAL(I)  1
1220 #endif /* CONFIG_TCG_INTERPRETER */
1221 
1222 static inline bool arg_slot_reg_p(unsigned arg_slot)
1223 {
1224     /*
1225      * Split the sizeof away from the comparison to avoid Werror from
1226      * "unsigned < 0 is always false", when iarg_regs is empty.
1227      */
1228     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1229     return arg_slot < nreg;
1230 }
1231 
1232 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1233 {
1234     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1235     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1236 
1237     tcg_debug_assert(stk_slot < max);
1238     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1239 }
1240 
1241 typedef struct TCGCumulativeArgs {
1242     int arg_idx;                /* tcg_gen_callN args[] */
1243     int info_in_idx;            /* TCGHelperInfo in[] */
1244     int arg_slot;               /* regs+stack slot */
1245     int ref_slot;               /* stack slots for references */
1246 } TCGCumulativeArgs;
1247 
1248 static void layout_arg_even(TCGCumulativeArgs *cum)
1249 {
1250     cum->arg_slot += cum->arg_slot & 1;
1251 }
1252 
1253 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1254                          TCGCallArgumentKind kind)
1255 {
1256     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1257 
1258     *loc = (TCGCallArgumentLoc){
1259         .kind = kind,
1260         .arg_idx = cum->arg_idx,
1261         .arg_slot = cum->arg_slot,
1262     };
1263     cum->info_in_idx++;
1264     cum->arg_slot++;
1265 }
1266 
1267 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1268                                 TCGHelperInfo *info, int n)
1269 {
1270     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1271 
1272     for (int i = 0; i < n; ++i) {
1273         /* Layout all using the same arg_idx, adjusting the subindex. */
1274         loc[i] = (TCGCallArgumentLoc){
1275             .kind = TCG_CALL_ARG_NORMAL,
1276             .arg_idx = cum->arg_idx,
1277             .tmp_subindex = i,
1278             .arg_slot = cum->arg_slot + i,
1279         };
1280     }
1281     cum->info_in_idx += n;
1282     cum->arg_slot += n;
1283 }
1284 
1285 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1286 {
1287     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1288     int n = 128 / TCG_TARGET_REG_BITS;
1289 
1290     /* The first subindex carries the pointer. */
1291     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1292 
1293     /*
1294      * The callee is allowed to clobber memory associated with
1295      * structure pass by-reference.  Therefore we must make copies.
1296      * Allocate space from "ref_slot", which will be adjusted to
1297      * follow the parameters on the stack.
1298      */
1299     loc[0].ref_slot = cum->ref_slot;
1300 
1301     /*
1302      * Subsequent words also go into the reference slot, but
1303      * do not accumulate into the regular arguments.
1304      */
1305     for (int i = 1; i < n; ++i) {
1306         loc[i] = (TCGCallArgumentLoc){
1307             .kind = TCG_CALL_ARG_BY_REF_N,
1308             .arg_idx = cum->arg_idx,
1309             .tmp_subindex = i,
1310             .ref_slot = cum->ref_slot + i,
1311         };
1312     }
1313     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1314     cum->ref_slot += n;
1315 }
1316 
1317 static void init_call_layout(TCGHelperInfo *info)
1318 {
1319     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1320     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1321     unsigned typemask = info->typemask;
1322     unsigned typecode;
1323     TCGCumulativeArgs cum = { };
1324 
1325     /*
1326      * Parse and place any function return value.
1327      */
1328     typecode = typemask & 7;
1329     switch (typecode) {
1330     case dh_typecode_void:
1331         info->nr_out = 0;
1332         break;
1333     case dh_typecode_i32:
1334     case dh_typecode_s32:
1335     case dh_typecode_ptr:
1336         info->nr_out = 1;
1337         info->out_kind = TCG_CALL_RET_NORMAL;
1338         break;
1339     case dh_typecode_i64:
1340     case dh_typecode_s64:
1341         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1342         info->out_kind = TCG_CALL_RET_NORMAL;
1343         /* Query the last register now to trigger any assert early. */
1344         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1345         break;
1346     case dh_typecode_i128:
1347         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1348         info->out_kind = TCG_TARGET_CALL_RET_I128;
1349         switch (TCG_TARGET_CALL_RET_I128) {
1350         case TCG_CALL_RET_NORMAL:
1351             /* Query the last register now to trigger any assert early. */
1352             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1353             break;
1354         case TCG_CALL_RET_BY_VEC:
1355             /* Query the single register now to trigger any assert early. */
1356             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1357             break;
1358         case TCG_CALL_RET_BY_REF:
1359             /*
1360              * Allocate the first argument to the output.
1361              * We don't need to store this anywhere, just make it
1362              * unavailable for use in the input loop below.
1363              */
1364             cum.arg_slot = 1;
1365             break;
1366         default:
1367             qemu_build_not_reached();
1368         }
1369         break;
1370     default:
1371         g_assert_not_reached();
1372     }
1373 
1374     /*
1375      * Parse and place function arguments.
1376      */
1377     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1378         TCGCallArgumentKind kind;
1379         TCGType type;
1380 
1381         typecode = typemask & 7;
1382         switch (typecode) {
1383         case dh_typecode_i32:
1384         case dh_typecode_s32:
1385             type = TCG_TYPE_I32;
1386             break;
1387         case dh_typecode_i64:
1388         case dh_typecode_s64:
1389             type = TCG_TYPE_I64;
1390             break;
1391         case dh_typecode_ptr:
1392             type = TCG_TYPE_PTR;
1393             break;
1394         case dh_typecode_i128:
1395             type = TCG_TYPE_I128;
1396             break;
1397         default:
1398             g_assert_not_reached();
1399         }
1400 
1401         switch (type) {
1402         case TCG_TYPE_I32:
1403             switch (TCG_TARGET_CALL_ARG_I32) {
1404             case TCG_CALL_ARG_EVEN:
1405                 layout_arg_even(&cum);
1406                 /* fall through */
1407             case TCG_CALL_ARG_NORMAL:
1408                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1409                 break;
1410             case TCG_CALL_ARG_EXTEND:
1411                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1412                 layout_arg_1(&cum, info, kind);
1413                 break;
1414             default:
1415                 qemu_build_not_reached();
1416             }
1417             break;
1418 
1419         case TCG_TYPE_I64:
1420             switch (TCG_TARGET_CALL_ARG_I64) {
1421             case TCG_CALL_ARG_EVEN:
1422                 layout_arg_even(&cum);
1423                 /* fall through */
1424             case TCG_CALL_ARG_NORMAL:
1425                 if (TCG_TARGET_REG_BITS == 32) {
1426                     layout_arg_normal_n(&cum, info, 2);
1427                 } else {
1428                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1429                 }
1430                 break;
1431             default:
1432                 qemu_build_not_reached();
1433             }
1434             break;
1435 
1436         case TCG_TYPE_I128:
1437             switch (TCG_TARGET_CALL_ARG_I128) {
1438             case TCG_CALL_ARG_EVEN:
1439                 layout_arg_even(&cum);
1440                 /* fall through */
1441             case TCG_CALL_ARG_NORMAL:
1442                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1443                 break;
1444             case TCG_CALL_ARG_BY_REF:
1445                 layout_arg_by_ref(&cum, info);
1446                 break;
1447             default:
1448                 qemu_build_not_reached();
1449             }
1450             break;
1451 
1452         default:
1453             g_assert_not_reached();
1454         }
1455     }
1456     info->nr_in = cum.info_in_idx;
1457 
1458     /* Validate that we didn't overrun the input array. */
1459     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1460     /* Validate the backend has enough argument space. */
1461     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1462 
1463     /*
1464      * Relocate the "ref_slot" area to the end of the parameters.
1465      * Minimizing this stack offset helps code size for x86,
1466      * which has a signed 8-bit offset encoding.
1467      */
1468     if (cum.ref_slot != 0) {
1469         int ref_base = 0;
1470 
1471         if (cum.arg_slot > max_reg_slots) {
1472             int align = __alignof(Int128) / sizeof(tcg_target_long);
1473 
1474             ref_base = cum.arg_slot - max_reg_slots;
1475             if (align > 1) {
1476                 ref_base = ROUND_UP(ref_base, align);
1477             }
1478         }
1479         assert(ref_base + cum.ref_slot <= max_stk_slots);
1480         ref_base += max_reg_slots;
1481 
1482         if (ref_base != 0) {
1483             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1484                 TCGCallArgumentLoc *loc = &info->in[i];
1485                 switch (loc->kind) {
1486                 case TCG_CALL_ARG_BY_REF:
1487                 case TCG_CALL_ARG_BY_REF_N:
1488                     loc->ref_slot += ref_base;
1489                     break;
1490                 default:
1491                     break;
1492                 }
1493             }
1494         }
1495     }
1496 }
1497 
1498 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1499 static void process_constraint_sets(void);
1500 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1501                                             TCGReg reg, const char *name);
1502 
1503 static void tcg_context_init(unsigned max_cpus)
1504 {
1505     TCGContext *s = &tcg_init_ctx;
1506     int n, i;
1507     TCGTemp *ts;
1508 
1509     memset(s, 0, sizeof(*s));
1510     s->nb_globals = 0;
1511 
1512     init_call_layout(&info_helper_ld32_mmu);
1513     init_call_layout(&info_helper_ld64_mmu);
1514     init_call_layout(&info_helper_ld128_mmu);
1515     init_call_layout(&info_helper_st32_mmu);
1516     init_call_layout(&info_helper_st64_mmu);
1517     init_call_layout(&info_helper_st128_mmu);
1518 
1519     tcg_target_init(s);
1520     process_constraint_sets();
1521 
1522     /* Reverse the order of the saved registers, assuming they're all at
1523        the start of tcg_target_reg_alloc_order.  */
1524     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1525         int r = tcg_target_reg_alloc_order[n];
1526         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1527             break;
1528         }
1529     }
1530     for (i = 0; i < n; ++i) {
1531         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1532     }
1533     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1534         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1535     }
1536 
1537     tcg_ctx = s;
1538     /*
1539      * In user-mode we simply share the init context among threads, since we
1540      * use a single region. See the documentation tcg_region_init() for the
1541      * reasoning behind this.
1542      * In system-mode we will have at most max_cpus TCG threads.
1543      */
1544 #ifdef CONFIG_USER_ONLY
1545     tcg_ctxs = &tcg_ctx;
1546     tcg_cur_ctxs = 1;
1547     tcg_max_ctxs = 1;
1548 #else
1549     tcg_max_ctxs = max_cpus;
1550     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1551 #endif
1552 
1553     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1554     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1555     tcg_env = temp_tcgv_ptr(ts);
1556 }
1557 
1558 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1559 {
1560     tcg_context_init(max_cpus);
1561     tcg_region_init(tb_size, splitwx, max_cpus);
1562 }
1563 
1564 /*
1565  * Allocate TBs right before their corresponding translated code, making
1566  * sure that TBs and code are on different cache lines.
1567  */
1568 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1569 {
1570     uintptr_t align = qemu_icache_linesize;
1571     TranslationBlock *tb;
1572     void *next;
1573 
1574  retry:
1575     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1576     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1577 
1578     if (unlikely(next > s->code_gen_highwater)) {
1579         if (tcg_region_alloc(s)) {
1580             return NULL;
1581         }
1582         goto retry;
1583     }
1584     qatomic_set(&s->code_gen_ptr, next);
1585     return tb;
1586 }
1587 
1588 void tcg_prologue_init(void)
1589 {
1590     TCGContext *s = tcg_ctx;
1591     size_t prologue_size;
1592 
1593     s->code_ptr = s->code_gen_ptr;
1594     s->code_buf = s->code_gen_ptr;
1595     s->data_gen_ptr = NULL;
1596 
1597 #ifndef CONFIG_TCG_INTERPRETER
1598     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1599 #endif
1600 
1601 #ifdef TCG_TARGET_NEED_POOL_LABELS
1602     s->pool_labels = NULL;
1603 #endif
1604 
1605     qemu_thread_jit_write();
1606     /* Generate the prologue.  */
1607     tcg_target_qemu_prologue(s);
1608 
1609 #ifdef TCG_TARGET_NEED_POOL_LABELS
1610     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1611     {
1612         int result = tcg_out_pool_finalize(s);
1613         tcg_debug_assert(result == 0);
1614     }
1615 #endif
1616 
1617     prologue_size = tcg_current_code_size(s);
1618     perf_report_prologue(s->code_gen_ptr, prologue_size);
1619 
1620 #ifndef CONFIG_TCG_INTERPRETER
1621     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1622                         (uintptr_t)s->code_buf, prologue_size);
1623 #endif
1624 
1625     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1626         FILE *logfile = qemu_log_trylock();
1627         if (logfile) {
1628             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1629             if (s->data_gen_ptr) {
1630                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1631                 size_t data_size = prologue_size - code_size;
1632                 size_t i;
1633 
1634                 disas(logfile, s->code_gen_ptr, code_size);
1635 
1636                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1637                     if (sizeof(tcg_target_ulong) == 8) {
1638                         fprintf(logfile,
1639                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1640                                 (uintptr_t)s->data_gen_ptr + i,
1641                                 *(uint64_t *)(s->data_gen_ptr + i));
1642                     } else {
1643                         fprintf(logfile,
1644                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1645                                 (uintptr_t)s->data_gen_ptr + i,
1646                                 *(uint32_t *)(s->data_gen_ptr + i));
1647                     }
1648                 }
1649             } else {
1650                 disas(logfile, s->code_gen_ptr, prologue_size);
1651             }
1652             fprintf(logfile, "\n");
1653             qemu_log_unlock(logfile);
1654         }
1655     }
1656 
1657 #ifndef CONFIG_TCG_INTERPRETER
1658     /*
1659      * Assert that goto_ptr is implemented completely, setting an epilogue.
1660      * For tci, we use NULL as the signal to return from the interpreter,
1661      * so skip this check.
1662      */
1663     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1664 #endif
1665 
1666     tcg_region_prologue_set(s);
1667 }
1668 
1669 void tcg_func_start(TCGContext *s)
1670 {
1671     tcg_pool_reset(s);
1672     s->nb_temps = s->nb_globals;
1673 
1674     /* No temps have been previously allocated for size or locality.  */
1675     tcg_temp_ebb_reset_freed(s);
1676 
1677     /* No constant temps have been previously allocated. */
1678     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1679         if (s->const_table[i]) {
1680             g_hash_table_remove_all(s->const_table[i]);
1681         }
1682     }
1683 
1684     s->nb_ops = 0;
1685     s->nb_labels = 0;
1686     s->current_frame_offset = s->frame_start;
1687 
1688 #ifdef CONFIG_DEBUG_TCG
1689     s->goto_tb_issue_mask = 0;
1690 #endif
1691 
1692     QTAILQ_INIT(&s->ops);
1693     QTAILQ_INIT(&s->free_ops);
1694     s->emit_before_op = NULL;
1695     QSIMPLEQ_INIT(&s->labels);
1696 
1697     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1698                      s->addr_type == TCG_TYPE_I64);
1699 
1700     tcg_debug_assert(s->insn_start_words > 0);
1701 }
1702 
1703 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1704 {
1705     int n = s->nb_temps++;
1706 
1707     if (n >= TCG_MAX_TEMPS) {
1708         tcg_raise_tb_overflow(s);
1709     }
1710     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1711 }
1712 
1713 static TCGTemp *tcg_global_alloc(TCGContext *s)
1714 {
1715     TCGTemp *ts;
1716 
1717     tcg_debug_assert(s->nb_globals == s->nb_temps);
1718     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1719     s->nb_globals++;
1720     ts = tcg_temp_alloc(s);
1721     ts->kind = TEMP_GLOBAL;
1722 
1723     return ts;
1724 }
1725 
1726 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1727                                             TCGReg reg, const char *name)
1728 {
1729     TCGTemp *ts;
1730 
1731     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1732 
1733     ts = tcg_global_alloc(s);
1734     ts->base_type = type;
1735     ts->type = type;
1736     ts->kind = TEMP_FIXED;
1737     ts->reg = reg;
1738     ts->name = name;
1739     tcg_regset_set_reg(s->reserved_regs, reg);
1740 
1741     return ts;
1742 }
1743 
1744 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1745 {
1746     s->frame_start = start;
1747     s->frame_end = start + size;
1748     s->frame_temp
1749         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1750 }
1751 
1752 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1753                                             const char *name, TCGType type)
1754 {
1755     TCGContext *s = tcg_ctx;
1756     TCGTemp *base_ts = tcgv_ptr_temp(base);
1757     TCGTemp *ts = tcg_global_alloc(s);
1758     int indirect_reg = 0;
1759 
1760     switch (base_ts->kind) {
1761     case TEMP_FIXED:
1762         break;
1763     case TEMP_GLOBAL:
1764         /* We do not support double-indirect registers.  */
1765         tcg_debug_assert(!base_ts->indirect_reg);
1766         base_ts->indirect_base = 1;
1767         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1768                             ? 2 : 1);
1769         indirect_reg = 1;
1770         break;
1771     default:
1772         g_assert_not_reached();
1773     }
1774 
1775     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1776         TCGTemp *ts2 = tcg_global_alloc(s);
1777         char buf[64];
1778 
1779         ts->base_type = TCG_TYPE_I64;
1780         ts->type = TCG_TYPE_I32;
1781         ts->indirect_reg = indirect_reg;
1782         ts->mem_allocated = 1;
1783         ts->mem_base = base_ts;
1784         ts->mem_offset = offset;
1785         pstrcpy(buf, sizeof(buf), name);
1786         pstrcat(buf, sizeof(buf), "_0");
1787         ts->name = strdup(buf);
1788 
1789         tcg_debug_assert(ts2 == ts + 1);
1790         ts2->base_type = TCG_TYPE_I64;
1791         ts2->type = TCG_TYPE_I32;
1792         ts2->indirect_reg = indirect_reg;
1793         ts2->mem_allocated = 1;
1794         ts2->mem_base = base_ts;
1795         ts2->mem_offset = offset + 4;
1796         ts2->temp_subindex = 1;
1797         pstrcpy(buf, sizeof(buf), name);
1798         pstrcat(buf, sizeof(buf), "_1");
1799         ts2->name = strdup(buf);
1800     } else {
1801         ts->base_type = type;
1802         ts->type = type;
1803         ts->indirect_reg = indirect_reg;
1804         ts->mem_allocated = 1;
1805         ts->mem_base = base_ts;
1806         ts->mem_offset = offset;
1807         ts->name = name;
1808     }
1809     return ts;
1810 }
1811 
1812 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1813 {
1814     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1815     return temp_tcgv_i32(ts);
1816 }
1817 
1818 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1819 {
1820     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1821     return temp_tcgv_i64(ts);
1822 }
1823 
1824 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1825 {
1826     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1827     return temp_tcgv_ptr(ts);
1828 }
1829 
1830 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1831 {
1832     TCGContext *s = tcg_ctx;
1833     TCGTemp *ts;
1834     int n;
1835 
1836     if (kind == TEMP_EBB) {
1837         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1838 
1839         if (idx < TCG_MAX_TEMPS) {
1840             /* There is already an available temp with the right type.  */
1841             clear_bit(idx, s->free_temps[type].l);
1842 
1843             ts = &s->temps[idx];
1844             ts->temp_allocated = 1;
1845             tcg_debug_assert(ts->base_type == type);
1846             tcg_debug_assert(ts->kind == kind);
1847             return ts;
1848         }
1849     } else {
1850         tcg_debug_assert(kind == TEMP_TB);
1851     }
1852 
1853     switch (type) {
1854     case TCG_TYPE_I32:
1855     case TCG_TYPE_V64:
1856     case TCG_TYPE_V128:
1857     case TCG_TYPE_V256:
1858         n = 1;
1859         break;
1860     case TCG_TYPE_I64:
1861         n = 64 / TCG_TARGET_REG_BITS;
1862         break;
1863     case TCG_TYPE_I128:
1864         n = 128 / TCG_TARGET_REG_BITS;
1865         break;
1866     default:
1867         g_assert_not_reached();
1868     }
1869 
1870     ts = tcg_temp_alloc(s);
1871     ts->base_type = type;
1872     ts->temp_allocated = 1;
1873     ts->kind = kind;
1874 
1875     if (n == 1) {
1876         ts->type = type;
1877     } else {
1878         ts->type = TCG_TYPE_REG;
1879 
1880         for (int i = 1; i < n; ++i) {
1881             TCGTemp *ts2 = tcg_temp_alloc(s);
1882 
1883             tcg_debug_assert(ts2 == ts + i);
1884             ts2->base_type = type;
1885             ts2->type = TCG_TYPE_REG;
1886             ts2->temp_allocated = 1;
1887             ts2->temp_subindex = i;
1888             ts2->kind = kind;
1889         }
1890     }
1891     return ts;
1892 }
1893 
1894 TCGv_i32 tcg_temp_new_i32(void)
1895 {
1896     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1897 }
1898 
1899 TCGv_i32 tcg_temp_ebb_new_i32(void)
1900 {
1901     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1902 }
1903 
1904 TCGv_i64 tcg_temp_new_i64(void)
1905 {
1906     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1907 }
1908 
1909 TCGv_i64 tcg_temp_ebb_new_i64(void)
1910 {
1911     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1912 }
1913 
1914 TCGv_ptr tcg_temp_new_ptr(void)
1915 {
1916     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1917 }
1918 
1919 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1920 {
1921     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1922 }
1923 
1924 TCGv_i128 tcg_temp_new_i128(void)
1925 {
1926     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1927 }
1928 
1929 TCGv_i128 tcg_temp_ebb_new_i128(void)
1930 {
1931     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1932 }
1933 
1934 TCGv_vec tcg_temp_new_vec(TCGType type)
1935 {
1936     TCGTemp *t;
1937 
1938 #ifdef CONFIG_DEBUG_TCG
1939     switch (type) {
1940     case TCG_TYPE_V64:
1941         assert(TCG_TARGET_HAS_v64);
1942         break;
1943     case TCG_TYPE_V128:
1944         assert(TCG_TARGET_HAS_v128);
1945         break;
1946     case TCG_TYPE_V256:
1947         assert(TCG_TARGET_HAS_v256);
1948         break;
1949     default:
1950         g_assert_not_reached();
1951     }
1952 #endif
1953 
1954     t = tcg_temp_new_internal(type, TEMP_EBB);
1955     return temp_tcgv_vec(t);
1956 }
1957 
1958 /* Create a new temp of the same type as an existing temp.  */
1959 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1960 {
1961     TCGTemp *t = tcgv_vec_temp(match);
1962 
1963     tcg_debug_assert(t->temp_allocated != 0);
1964 
1965     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1966     return temp_tcgv_vec(t);
1967 }
1968 
1969 void tcg_temp_free_internal(TCGTemp *ts)
1970 {
1971     TCGContext *s = tcg_ctx;
1972 
1973     switch (ts->kind) {
1974     case TEMP_CONST:
1975     case TEMP_TB:
1976         /* Silently ignore free. */
1977         break;
1978     case TEMP_EBB:
1979         tcg_debug_assert(ts->temp_allocated != 0);
1980         ts->temp_allocated = 0;
1981         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1982         break;
1983     default:
1984         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1985         g_assert_not_reached();
1986     }
1987 }
1988 
1989 void tcg_temp_free_i32(TCGv_i32 arg)
1990 {
1991     tcg_temp_free_internal(tcgv_i32_temp(arg));
1992 }
1993 
1994 void tcg_temp_free_i64(TCGv_i64 arg)
1995 {
1996     tcg_temp_free_internal(tcgv_i64_temp(arg));
1997 }
1998 
1999 void tcg_temp_free_i128(TCGv_i128 arg)
2000 {
2001     tcg_temp_free_internal(tcgv_i128_temp(arg));
2002 }
2003 
2004 void tcg_temp_free_ptr(TCGv_ptr arg)
2005 {
2006     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2007 }
2008 
2009 void tcg_temp_free_vec(TCGv_vec arg)
2010 {
2011     tcg_temp_free_internal(tcgv_vec_temp(arg));
2012 }
2013 
2014 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2015 {
2016     TCGContext *s = tcg_ctx;
2017     GHashTable *h = s->const_table[type];
2018     TCGTemp *ts;
2019 
2020     if (h == NULL) {
2021         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2022         s->const_table[type] = h;
2023     }
2024 
2025     ts = g_hash_table_lookup(h, &val);
2026     if (ts == NULL) {
2027         int64_t *val_ptr;
2028 
2029         ts = tcg_temp_alloc(s);
2030 
2031         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2032             TCGTemp *ts2 = tcg_temp_alloc(s);
2033 
2034             tcg_debug_assert(ts2 == ts + 1);
2035 
2036             ts->base_type = TCG_TYPE_I64;
2037             ts->type = TCG_TYPE_I32;
2038             ts->kind = TEMP_CONST;
2039             ts->temp_allocated = 1;
2040 
2041             ts2->base_type = TCG_TYPE_I64;
2042             ts2->type = TCG_TYPE_I32;
2043             ts2->kind = TEMP_CONST;
2044             ts2->temp_allocated = 1;
2045             ts2->temp_subindex = 1;
2046 
2047             /*
2048              * Retain the full value of the 64-bit constant in the low
2049              * part, so that the hash table works.  Actual uses will
2050              * truncate the value to the low part.
2051              */
2052             ts[HOST_BIG_ENDIAN].val = val;
2053             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2054             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2055         } else {
2056             ts->base_type = type;
2057             ts->type = type;
2058             ts->kind = TEMP_CONST;
2059             ts->temp_allocated = 1;
2060             ts->val = val;
2061             val_ptr = &ts->val;
2062         }
2063         g_hash_table_insert(h, val_ptr, ts);
2064     }
2065 
2066     return ts;
2067 }
2068 
2069 TCGv_i32 tcg_constant_i32(int32_t val)
2070 {
2071     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2072 }
2073 
2074 TCGv_i64 tcg_constant_i64(int64_t val)
2075 {
2076     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2077 }
2078 
2079 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2080 {
2081     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2082 }
2083 
2084 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2085 {
2086     val = dup_const(vece, val);
2087     return temp_tcgv_vec(tcg_constant_internal(type, val));
2088 }
2089 
2090 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2091 {
2092     TCGTemp *t = tcgv_vec_temp(match);
2093 
2094     tcg_debug_assert(t->temp_allocated != 0);
2095     return tcg_constant_vec(t->base_type, vece, val);
2096 }
2097 
2098 #ifdef CONFIG_DEBUG_TCG
2099 size_t temp_idx(TCGTemp *ts)
2100 {
2101     ptrdiff_t n = ts - tcg_ctx->temps;
2102     assert(n >= 0 && n < tcg_ctx->nb_temps);
2103     return n;
2104 }
2105 
2106 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2107 {
2108     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2109 
2110     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2111     assert(o % sizeof(TCGTemp) == 0);
2112 
2113     return (void *)tcg_ctx + (uintptr_t)v;
2114 }
2115 #endif /* CONFIG_DEBUG_TCG */
2116 
2117 /*
2118  * Return true if OP may appear in the opcode stream with TYPE.
2119  * Test the runtime variable that controls each opcode.
2120  */
2121 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2122 {
2123     bool has_type;
2124 
2125     switch (type) {
2126     case TCG_TYPE_I32:
2127         has_type = true;
2128         break;
2129     case TCG_TYPE_I64:
2130         has_type = TCG_TARGET_REG_BITS == 64;
2131         break;
2132     case TCG_TYPE_V64:
2133         has_type = TCG_TARGET_HAS_v64;
2134         break;
2135     case TCG_TYPE_V128:
2136         has_type = TCG_TARGET_HAS_v128;
2137         break;
2138     case TCG_TYPE_V256:
2139         has_type = TCG_TARGET_HAS_v256;
2140         break;
2141     default:
2142         has_type = false;
2143         break;
2144     }
2145 
2146     switch (op) {
2147     case INDEX_op_discard:
2148     case INDEX_op_set_label:
2149     case INDEX_op_call:
2150     case INDEX_op_br:
2151     case INDEX_op_mb:
2152     case INDEX_op_insn_start:
2153     case INDEX_op_exit_tb:
2154     case INDEX_op_goto_tb:
2155     case INDEX_op_goto_ptr:
2156     case INDEX_op_qemu_ld_a32_i32:
2157     case INDEX_op_qemu_ld_a64_i32:
2158     case INDEX_op_qemu_st_a32_i32:
2159     case INDEX_op_qemu_st_a64_i32:
2160     case INDEX_op_qemu_ld_a32_i64:
2161     case INDEX_op_qemu_ld_a64_i64:
2162     case INDEX_op_qemu_st_a32_i64:
2163     case INDEX_op_qemu_st_a64_i64:
2164         return true;
2165 
2166     case INDEX_op_qemu_st8_a32_i32:
2167     case INDEX_op_qemu_st8_a64_i32:
2168         return TCG_TARGET_HAS_qemu_st8_i32;
2169 
2170     case INDEX_op_qemu_ld_a32_i128:
2171     case INDEX_op_qemu_ld_a64_i128:
2172     case INDEX_op_qemu_st_a32_i128:
2173     case INDEX_op_qemu_st_a64_i128:
2174         return TCG_TARGET_HAS_qemu_ldst_i128;
2175 
2176     case INDEX_op_mov_i32:
2177     case INDEX_op_setcond_i32:
2178     case INDEX_op_brcond_i32:
2179     case INDEX_op_movcond_i32:
2180     case INDEX_op_ld8u_i32:
2181     case INDEX_op_ld8s_i32:
2182     case INDEX_op_ld16u_i32:
2183     case INDEX_op_ld16s_i32:
2184     case INDEX_op_ld_i32:
2185     case INDEX_op_st8_i32:
2186     case INDEX_op_st16_i32:
2187     case INDEX_op_st_i32:
2188     case INDEX_op_add_i32:
2189     case INDEX_op_sub_i32:
2190     case INDEX_op_neg_i32:
2191     case INDEX_op_mul_i32:
2192     case INDEX_op_and_i32:
2193     case INDEX_op_or_i32:
2194     case INDEX_op_xor_i32:
2195     case INDEX_op_shl_i32:
2196     case INDEX_op_shr_i32:
2197     case INDEX_op_sar_i32:
2198     case INDEX_op_extract_i32:
2199     case INDEX_op_sextract_i32:
2200     case INDEX_op_deposit_i32:
2201         return true;
2202 
2203     case INDEX_op_negsetcond_i32:
2204         return TCG_TARGET_HAS_negsetcond_i32;
2205     case INDEX_op_div_i32:
2206     case INDEX_op_divu_i32:
2207         return TCG_TARGET_HAS_div_i32;
2208     case INDEX_op_rem_i32:
2209     case INDEX_op_remu_i32:
2210         return TCG_TARGET_HAS_rem_i32;
2211     case INDEX_op_div2_i32:
2212     case INDEX_op_divu2_i32:
2213         return TCG_TARGET_HAS_div2_i32;
2214     case INDEX_op_rotl_i32:
2215     case INDEX_op_rotr_i32:
2216         return TCG_TARGET_HAS_rot_i32;
2217     case INDEX_op_extract2_i32:
2218         return TCG_TARGET_HAS_extract2_i32;
2219     case INDEX_op_add2_i32:
2220         return TCG_TARGET_HAS_add2_i32;
2221     case INDEX_op_sub2_i32:
2222         return TCG_TARGET_HAS_sub2_i32;
2223     case INDEX_op_mulu2_i32:
2224         return TCG_TARGET_HAS_mulu2_i32;
2225     case INDEX_op_muls2_i32:
2226         return TCG_TARGET_HAS_muls2_i32;
2227     case INDEX_op_muluh_i32:
2228         return TCG_TARGET_HAS_muluh_i32;
2229     case INDEX_op_mulsh_i32:
2230         return TCG_TARGET_HAS_mulsh_i32;
2231     case INDEX_op_ext8s_i32:
2232         return TCG_TARGET_HAS_ext8s_i32;
2233     case INDEX_op_ext16s_i32:
2234         return TCG_TARGET_HAS_ext16s_i32;
2235     case INDEX_op_ext8u_i32:
2236         return TCG_TARGET_HAS_ext8u_i32;
2237     case INDEX_op_ext16u_i32:
2238         return TCG_TARGET_HAS_ext16u_i32;
2239     case INDEX_op_bswap16_i32:
2240         return TCG_TARGET_HAS_bswap16_i32;
2241     case INDEX_op_bswap32_i32:
2242         return TCG_TARGET_HAS_bswap32_i32;
2243     case INDEX_op_not_i32:
2244         return TCG_TARGET_HAS_not_i32;
2245     case INDEX_op_andc_i32:
2246         return TCG_TARGET_HAS_andc_i32;
2247     case INDEX_op_orc_i32:
2248         return TCG_TARGET_HAS_orc_i32;
2249     case INDEX_op_eqv_i32:
2250         return TCG_TARGET_HAS_eqv_i32;
2251     case INDEX_op_nand_i32:
2252         return TCG_TARGET_HAS_nand_i32;
2253     case INDEX_op_nor_i32:
2254         return TCG_TARGET_HAS_nor_i32;
2255     case INDEX_op_clz_i32:
2256         return TCG_TARGET_HAS_clz_i32;
2257     case INDEX_op_ctz_i32:
2258         return TCG_TARGET_HAS_ctz_i32;
2259     case INDEX_op_ctpop_i32:
2260         return TCG_TARGET_HAS_ctpop_i32;
2261 
2262     case INDEX_op_brcond2_i32:
2263     case INDEX_op_setcond2_i32:
2264         return TCG_TARGET_REG_BITS == 32;
2265 
2266     case INDEX_op_mov_i64:
2267     case INDEX_op_setcond_i64:
2268     case INDEX_op_brcond_i64:
2269     case INDEX_op_movcond_i64:
2270     case INDEX_op_ld8u_i64:
2271     case INDEX_op_ld8s_i64:
2272     case INDEX_op_ld16u_i64:
2273     case INDEX_op_ld16s_i64:
2274     case INDEX_op_ld32u_i64:
2275     case INDEX_op_ld32s_i64:
2276     case INDEX_op_ld_i64:
2277     case INDEX_op_st8_i64:
2278     case INDEX_op_st16_i64:
2279     case INDEX_op_st32_i64:
2280     case INDEX_op_st_i64:
2281     case INDEX_op_add_i64:
2282     case INDEX_op_sub_i64:
2283     case INDEX_op_neg_i64:
2284     case INDEX_op_mul_i64:
2285     case INDEX_op_and_i64:
2286     case INDEX_op_or_i64:
2287     case INDEX_op_xor_i64:
2288     case INDEX_op_shl_i64:
2289     case INDEX_op_shr_i64:
2290     case INDEX_op_sar_i64:
2291     case INDEX_op_ext_i32_i64:
2292     case INDEX_op_extu_i32_i64:
2293     case INDEX_op_extract_i64:
2294     case INDEX_op_sextract_i64:
2295     case INDEX_op_deposit_i64:
2296         return TCG_TARGET_REG_BITS == 64;
2297 
2298     case INDEX_op_negsetcond_i64:
2299         return TCG_TARGET_HAS_negsetcond_i64;
2300     case INDEX_op_div_i64:
2301     case INDEX_op_divu_i64:
2302         return TCG_TARGET_HAS_div_i64;
2303     case INDEX_op_rem_i64:
2304     case INDEX_op_remu_i64:
2305         return TCG_TARGET_HAS_rem_i64;
2306     case INDEX_op_div2_i64:
2307     case INDEX_op_divu2_i64:
2308         return TCG_TARGET_HAS_div2_i64;
2309     case INDEX_op_rotl_i64:
2310     case INDEX_op_rotr_i64:
2311         return TCG_TARGET_HAS_rot_i64;
2312     case INDEX_op_extract2_i64:
2313         return TCG_TARGET_HAS_extract2_i64;
2314     case INDEX_op_extrl_i64_i32:
2315     case INDEX_op_extrh_i64_i32:
2316         return TCG_TARGET_HAS_extr_i64_i32;
2317     case INDEX_op_ext8s_i64:
2318         return TCG_TARGET_HAS_ext8s_i64;
2319     case INDEX_op_ext16s_i64:
2320         return TCG_TARGET_HAS_ext16s_i64;
2321     case INDEX_op_ext32s_i64:
2322         return TCG_TARGET_HAS_ext32s_i64;
2323     case INDEX_op_ext8u_i64:
2324         return TCG_TARGET_HAS_ext8u_i64;
2325     case INDEX_op_ext16u_i64:
2326         return TCG_TARGET_HAS_ext16u_i64;
2327     case INDEX_op_ext32u_i64:
2328         return TCG_TARGET_HAS_ext32u_i64;
2329     case INDEX_op_bswap16_i64:
2330         return TCG_TARGET_HAS_bswap16_i64;
2331     case INDEX_op_bswap32_i64:
2332         return TCG_TARGET_HAS_bswap32_i64;
2333     case INDEX_op_bswap64_i64:
2334         return TCG_TARGET_HAS_bswap64_i64;
2335     case INDEX_op_not_i64:
2336         return TCG_TARGET_HAS_not_i64;
2337     case INDEX_op_andc_i64:
2338         return TCG_TARGET_HAS_andc_i64;
2339     case INDEX_op_orc_i64:
2340         return TCG_TARGET_HAS_orc_i64;
2341     case INDEX_op_eqv_i64:
2342         return TCG_TARGET_HAS_eqv_i64;
2343     case INDEX_op_nand_i64:
2344         return TCG_TARGET_HAS_nand_i64;
2345     case INDEX_op_nor_i64:
2346         return TCG_TARGET_HAS_nor_i64;
2347     case INDEX_op_clz_i64:
2348         return TCG_TARGET_HAS_clz_i64;
2349     case INDEX_op_ctz_i64:
2350         return TCG_TARGET_HAS_ctz_i64;
2351     case INDEX_op_ctpop_i64:
2352         return TCG_TARGET_HAS_ctpop_i64;
2353     case INDEX_op_add2_i64:
2354         return TCG_TARGET_HAS_add2_i64;
2355     case INDEX_op_sub2_i64:
2356         return TCG_TARGET_HAS_sub2_i64;
2357     case INDEX_op_mulu2_i64:
2358         return TCG_TARGET_HAS_mulu2_i64;
2359     case INDEX_op_muls2_i64:
2360         return TCG_TARGET_HAS_muls2_i64;
2361     case INDEX_op_muluh_i64:
2362         return TCG_TARGET_HAS_muluh_i64;
2363     case INDEX_op_mulsh_i64:
2364         return TCG_TARGET_HAS_mulsh_i64;
2365 
2366     case INDEX_op_mov_vec:
2367     case INDEX_op_dup_vec:
2368     case INDEX_op_dupm_vec:
2369     case INDEX_op_ld_vec:
2370     case INDEX_op_st_vec:
2371     case INDEX_op_add_vec:
2372     case INDEX_op_sub_vec:
2373     case INDEX_op_and_vec:
2374     case INDEX_op_or_vec:
2375     case INDEX_op_xor_vec:
2376     case INDEX_op_cmp_vec:
2377         return has_type;
2378     case INDEX_op_dup2_vec:
2379         return has_type && TCG_TARGET_REG_BITS == 32;
2380     case INDEX_op_not_vec:
2381         return has_type && TCG_TARGET_HAS_not_vec;
2382     case INDEX_op_neg_vec:
2383         return has_type && TCG_TARGET_HAS_neg_vec;
2384     case INDEX_op_abs_vec:
2385         return has_type && TCG_TARGET_HAS_abs_vec;
2386     case INDEX_op_andc_vec:
2387         return has_type && TCG_TARGET_HAS_andc_vec;
2388     case INDEX_op_orc_vec:
2389         return has_type && TCG_TARGET_HAS_orc_vec;
2390     case INDEX_op_nand_vec:
2391         return has_type && TCG_TARGET_HAS_nand_vec;
2392     case INDEX_op_nor_vec:
2393         return has_type && TCG_TARGET_HAS_nor_vec;
2394     case INDEX_op_eqv_vec:
2395         return has_type && TCG_TARGET_HAS_eqv_vec;
2396     case INDEX_op_mul_vec:
2397         return has_type && TCG_TARGET_HAS_mul_vec;
2398     case INDEX_op_shli_vec:
2399     case INDEX_op_shri_vec:
2400     case INDEX_op_sari_vec:
2401         return has_type && TCG_TARGET_HAS_shi_vec;
2402     case INDEX_op_shls_vec:
2403     case INDEX_op_shrs_vec:
2404     case INDEX_op_sars_vec:
2405         return has_type && TCG_TARGET_HAS_shs_vec;
2406     case INDEX_op_shlv_vec:
2407     case INDEX_op_shrv_vec:
2408     case INDEX_op_sarv_vec:
2409         return has_type && TCG_TARGET_HAS_shv_vec;
2410     case INDEX_op_rotli_vec:
2411         return has_type && TCG_TARGET_HAS_roti_vec;
2412     case INDEX_op_rotls_vec:
2413         return has_type && TCG_TARGET_HAS_rots_vec;
2414     case INDEX_op_rotlv_vec:
2415     case INDEX_op_rotrv_vec:
2416         return has_type && TCG_TARGET_HAS_rotv_vec;
2417     case INDEX_op_ssadd_vec:
2418     case INDEX_op_usadd_vec:
2419     case INDEX_op_sssub_vec:
2420     case INDEX_op_ussub_vec:
2421         return has_type && TCG_TARGET_HAS_sat_vec;
2422     case INDEX_op_smin_vec:
2423     case INDEX_op_umin_vec:
2424     case INDEX_op_smax_vec:
2425     case INDEX_op_umax_vec:
2426         return has_type && TCG_TARGET_HAS_minmax_vec;
2427     case INDEX_op_bitsel_vec:
2428         return has_type && TCG_TARGET_HAS_bitsel_vec;
2429     case INDEX_op_cmpsel_vec:
2430         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2431 
2432     default:
2433         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2434         return true;
2435     }
2436 }
2437 
2438 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2439 {
2440     unsigned width;
2441 
2442     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2443     width = (type == TCG_TYPE_I32 ? 32 : 64);
2444 
2445     tcg_debug_assert(ofs < width);
2446     tcg_debug_assert(len > 0);
2447     tcg_debug_assert(len <= width - ofs);
2448 
2449     return TCG_TARGET_deposit_valid(type, ofs, len);
2450 }
2451 
2452 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2453 
2454 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2455                           TCGTemp *ret, TCGTemp **args)
2456 {
2457     TCGv_i64 extend_free[MAX_CALL_IARGS];
2458     int n_extend = 0;
2459     TCGOp *op;
2460     int i, n, pi = 0, total_args;
2461 
2462     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2463         init_call_layout(info);
2464         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2465     }
2466 
2467     total_args = info->nr_out + info->nr_in + 2;
2468     op = tcg_op_alloc(INDEX_op_call, total_args);
2469 
2470 #ifdef CONFIG_PLUGIN
2471     /* Flag helpers that may affect guest state */
2472     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2473         tcg_ctx->plugin_insn->calls_helpers = true;
2474     }
2475 #endif
2476 
2477     TCGOP_CALLO(op) = n = info->nr_out;
2478     switch (n) {
2479     case 0:
2480         tcg_debug_assert(ret == NULL);
2481         break;
2482     case 1:
2483         tcg_debug_assert(ret != NULL);
2484         op->args[pi++] = temp_arg(ret);
2485         break;
2486     case 2:
2487     case 4:
2488         tcg_debug_assert(ret != NULL);
2489         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2490         tcg_debug_assert(ret->temp_subindex == 0);
2491         for (i = 0; i < n; ++i) {
2492             op->args[pi++] = temp_arg(ret + i);
2493         }
2494         break;
2495     default:
2496         g_assert_not_reached();
2497     }
2498 
2499     TCGOP_CALLI(op) = n = info->nr_in;
2500     for (i = 0; i < n; i++) {
2501         const TCGCallArgumentLoc *loc = &info->in[i];
2502         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2503 
2504         switch (loc->kind) {
2505         case TCG_CALL_ARG_NORMAL:
2506         case TCG_CALL_ARG_BY_REF:
2507         case TCG_CALL_ARG_BY_REF_N:
2508             op->args[pi++] = temp_arg(ts);
2509             break;
2510 
2511         case TCG_CALL_ARG_EXTEND_U:
2512         case TCG_CALL_ARG_EXTEND_S:
2513             {
2514                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2515                 TCGv_i32 orig = temp_tcgv_i32(ts);
2516 
2517                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2518                     tcg_gen_ext_i32_i64(temp, orig);
2519                 } else {
2520                     tcg_gen_extu_i32_i64(temp, orig);
2521                 }
2522                 op->args[pi++] = tcgv_i64_arg(temp);
2523                 extend_free[n_extend++] = temp;
2524             }
2525             break;
2526 
2527         default:
2528             g_assert_not_reached();
2529         }
2530     }
2531     op->args[pi++] = (uintptr_t)func;
2532     op->args[pi++] = (uintptr_t)info;
2533     tcg_debug_assert(pi == total_args);
2534 
2535     if (tcg_ctx->emit_before_op) {
2536         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2537     } else {
2538         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2539     }
2540 
2541     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2542     for (i = 0; i < n_extend; ++i) {
2543         tcg_temp_free_i64(extend_free[i]);
2544     }
2545 }
2546 
2547 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2548 {
2549     tcg_gen_callN(func, info, ret, NULL);
2550 }
2551 
2552 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2553 {
2554     tcg_gen_callN(func, info, ret, &t1);
2555 }
2556 
2557 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2558                    TCGTemp *t1, TCGTemp *t2)
2559 {
2560     TCGTemp *args[2] = { t1, t2 };
2561     tcg_gen_callN(func, info, ret, args);
2562 }
2563 
2564 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2565                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2566 {
2567     TCGTemp *args[3] = { t1, t2, t3 };
2568     tcg_gen_callN(func, info, ret, args);
2569 }
2570 
2571 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2572                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2573 {
2574     TCGTemp *args[4] = { t1, t2, t3, t4 };
2575     tcg_gen_callN(func, info, ret, args);
2576 }
2577 
2578 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2579                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2580 {
2581     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2582     tcg_gen_callN(func, info, ret, args);
2583 }
2584 
2585 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2586                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2587                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2588 {
2589     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2590     tcg_gen_callN(func, info, ret, args);
2591 }
2592 
2593 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2594                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2595                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2596 {
2597     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2598     tcg_gen_callN(func, info, ret, args);
2599 }
2600 
2601 static void tcg_reg_alloc_start(TCGContext *s)
2602 {
2603     int i, n;
2604 
2605     for (i = 0, n = s->nb_temps; i < n; i++) {
2606         TCGTemp *ts = &s->temps[i];
2607         TCGTempVal val = TEMP_VAL_MEM;
2608 
2609         switch (ts->kind) {
2610         case TEMP_CONST:
2611             val = TEMP_VAL_CONST;
2612             break;
2613         case TEMP_FIXED:
2614             val = TEMP_VAL_REG;
2615             break;
2616         case TEMP_GLOBAL:
2617             break;
2618         case TEMP_EBB:
2619             val = TEMP_VAL_DEAD;
2620             /* fall through */
2621         case TEMP_TB:
2622             ts->mem_allocated = 0;
2623             break;
2624         default:
2625             g_assert_not_reached();
2626         }
2627         ts->val_type = val;
2628     }
2629 
2630     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2631 }
2632 
2633 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2634                                  TCGTemp *ts)
2635 {
2636     int idx = temp_idx(ts);
2637 
2638     switch (ts->kind) {
2639     case TEMP_FIXED:
2640     case TEMP_GLOBAL:
2641         pstrcpy(buf, buf_size, ts->name);
2642         break;
2643     case TEMP_TB:
2644         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2645         break;
2646     case TEMP_EBB:
2647         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2648         break;
2649     case TEMP_CONST:
2650         switch (ts->type) {
2651         case TCG_TYPE_I32:
2652             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2653             break;
2654 #if TCG_TARGET_REG_BITS > 32
2655         case TCG_TYPE_I64:
2656             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2657             break;
2658 #endif
2659         case TCG_TYPE_V64:
2660         case TCG_TYPE_V128:
2661         case TCG_TYPE_V256:
2662             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2663                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2664             break;
2665         default:
2666             g_assert_not_reached();
2667         }
2668         break;
2669     }
2670     return buf;
2671 }
2672 
2673 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2674                              int buf_size, TCGArg arg)
2675 {
2676     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2677 }
2678 
2679 static const char * const cond_name[] =
2680 {
2681     [TCG_COND_NEVER] = "never",
2682     [TCG_COND_ALWAYS] = "always",
2683     [TCG_COND_EQ] = "eq",
2684     [TCG_COND_NE] = "ne",
2685     [TCG_COND_LT] = "lt",
2686     [TCG_COND_GE] = "ge",
2687     [TCG_COND_LE] = "le",
2688     [TCG_COND_GT] = "gt",
2689     [TCG_COND_LTU] = "ltu",
2690     [TCG_COND_GEU] = "geu",
2691     [TCG_COND_LEU] = "leu",
2692     [TCG_COND_GTU] = "gtu",
2693     [TCG_COND_TSTEQ] = "tsteq",
2694     [TCG_COND_TSTNE] = "tstne",
2695 };
2696 
2697 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2698 {
2699     [MO_UB]   = "ub",
2700     [MO_SB]   = "sb",
2701     [MO_LEUW] = "leuw",
2702     [MO_LESW] = "lesw",
2703     [MO_LEUL] = "leul",
2704     [MO_LESL] = "lesl",
2705     [MO_LEUQ] = "leq",
2706     [MO_BEUW] = "beuw",
2707     [MO_BESW] = "besw",
2708     [MO_BEUL] = "beul",
2709     [MO_BESL] = "besl",
2710     [MO_BEUQ] = "beq",
2711     [MO_128 + MO_BE] = "beo",
2712     [MO_128 + MO_LE] = "leo",
2713 };
2714 
2715 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2716     [MO_UNALN >> MO_ASHIFT]    = "un+",
2717     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2718     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2719     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2720     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2721     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2722     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2723     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2724 };
2725 
2726 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2727     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2728     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2729     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2730     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2731     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2732     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2733 };
2734 
2735 static const char bswap_flag_name[][6] = {
2736     [TCG_BSWAP_IZ] = "iz",
2737     [TCG_BSWAP_OZ] = "oz",
2738     [TCG_BSWAP_OS] = "os",
2739     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2740     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2741 };
2742 
2743 #ifdef CONFIG_PLUGIN
2744 static const char * const plugin_from_name[] = {
2745     "from-tb",
2746     "from-insn",
2747     "after-insn",
2748     "after-tb",
2749 };
2750 #endif
2751 
2752 static inline bool tcg_regset_single(TCGRegSet d)
2753 {
2754     return (d & (d - 1)) == 0;
2755 }
2756 
2757 static inline TCGReg tcg_regset_first(TCGRegSet d)
2758 {
2759     if (TCG_TARGET_NB_REGS <= 32) {
2760         return ctz32(d);
2761     } else {
2762         return ctz64(d);
2763     }
2764 }
2765 
2766 /* Return only the number of characters output -- no error return. */
2767 #define ne_fprintf(...) \
2768     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2769 
2770 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2771 {
2772     char buf[128];
2773     TCGOp *op;
2774 
2775     QTAILQ_FOREACH(op, &s->ops, link) {
2776         int i, k, nb_oargs, nb_iargs, nb_cargs;
2777         const TCGOpDef *def;
2778         TCGOpcode c;
2779         int col = 0;
2780 
2781         c = op->opc;
2782         def = &tcg_op_defs[c];
2783 
2784         if (c == INDEX_op_insn_start) {
2785             nb_oargs = 0;
2786             col += ne_fprintf(f, "\n ----");
2787 
2788             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2789                 col += ne_fprintf(f, " %016" PRIx64,
2790                                   tcg_get_insn_start_param(op, i));
2791             }
2792         } else if (c == INDEX_op_call) {
2793             const TCGHelperInfo *info = tcg_call_info(op);
2794             void *func = tcg_call_func(op);
2795 
2796             /* variable number of arguments */
2797             nb_oargs = TCGOP_CALLO(op);
2798             nb_iargs = TCGOP_CALLI(op);
2799             nb_cargs = def->nb_cargs;
2800 
2801             col += ne_fprintf(f, " %s ", def->name);
2802 
2803             /*
2804              * Print the function name from TCGHelperInfo, if available.
2805              * Note that plugins have a template function for the info,
2806              * but the actual function pointer comes from the plugin.
2807              */
2808             if (func == info->func) {
2809                 col += ne_fprintf(f, "%s", info->name);
2810             } else {
2811                 col += ne_fprintf(f, "plugin(%p)", func);
2812             }
2813 
2814             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2815             for (i = 0; i < nb_oargs; i++) {
2816                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2817                                                             op->args[i]));
2818             }
2819             for (i = 0; i < nb_iargs; i++) {
2820                 TCGArg arg = op->args[nb_oargs + i];
2821                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2822                 col += ne_fprintf(f, ",%s", t);
2823             }
2824         } else {
2825             col += ne_fprintf(f, " %s ", def->name);
2826 
2827             nb_oargs = def->nb_oargs;
2828             nb_iargs = def->nb_iargs;
2829             nb_cargs = def->nb_cargs;
2830 
2831             if (def->flags & TCG_OPF_VECTOR) {
2832                 col += ne_fprintf(f, "v%d,e%d,",
2833                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2834                                   8 << TCGOP_VECE(op));
2835             }
2836 
2837             k = 0;
2838             for (i = 0; i < nb_oargs; i++) {
2839                 const char *sep =  k ? "," : "";
2840                 col += ne_fprintf(f, "%s%s", sep,
2841                                   tcg_get_arg_str(s, buf, sizeof(buf),
2842                                                   op->args[k++]));
2843             }
2844             for (i = 0; i < nb_iargs; i++) {
2845                 const char *sep =  k ? "," : "";
2846                 col += ne_fprintf(f, "%s%s", sep,
2847                                   tcg_get_arg_str(s, buf, sizeof(buf),
2848                                                   op->args[k++]));
2849             }
2850             switch (c) {
2851             case INDEX_op_brcond_i32:
2852             case INDEX_op_setcond_i32:
2853             case INDEX_op_negsetcond_i32:
2854             case INDEX_op_movcond_i32:
2855             case INDEX_op_brcond2_i32:
2856             case INDEX_op_setcond2_i32:
2857             case INDEX_op_brcond_i64:
2858             case INDEX_op_setcond_i64:
2859             case INDEX_op_negsetcond_i64:
2860             case INDEX_op_movcond_i64:
2861             case INDEX_op_cmp_vec:
2862             case INDEX_op_cmpsel_vec:
2863                 if (op->args[k] < ARRAY_SIZE(cond_name)
2864                     && cond_name[op->args[k]]) {
2865                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2866                 } else {
2867                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2868                 }
2869                 i = 1;
2870                 break;
2871             case INDEX_op_qemu_ld_a32_i32:
2872             case INDEX_op_qemu_ld_a64_i32:
2873             case INDEX_op_qemu_st_a32_i32:
2874             case INDEX_op_qemu_st_a64_i32:
2875             case INDEX_op_qemu_st8_a32_i32:
2876             case INDEX_op_qemu_st8_a64_i32:
2877             case INDEX_op_qemu_ld_a32_i64:
2878             case INDEX_op_qemu_ld_a64_i64:
2879             case INDEX_op_qemu_st_a32_i64:
2880             case INDEX_op_qemu_st_a64_i64:
2881             case INDEX_op_qemu_ld_a32_i128:
2882             case INDEX_op_qemu_ld_a64_i128:
2883             case INDEX_op_qemu_st_a32_i128:
2884             case INDEX_op_qemu_st_a64_i128:
2885                 {
2886                     const char *s_al, *s_op, *s_at;
2887                     MemOpIdx oi = op->args[k++];
2888                     MemOp mop = get_memop(oi);
2889                     unsigned ix = get_mmuidx(oi);
2890 
2891                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2892                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2893                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2894                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2895 
2896                     /* If all fields are accounted for, print symbolically. */
2897                     if (!mop && s_al && s_op && s_at) {
2898                         col += ne_fprintf(f, ",%s%s%s,%u",
2899                                           s_at, s_al, s_op, ix);
2900                     } else {
2901                         mop = get_memop(oi);
2902                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2903                     }
2904                     i = 1;
2905                 }
2906                 break;
2907             case INDEX_op_bswap16_i32:
2908             case INDEX_op_bswap16_i64:
2909             case INDEX_op_bswap32_i32:
2910             case INDEX_op_bswap32_i64:
2911             case INDEX_op_bswap64_i64:
2912                 {
2913                     TCGArg flags = op->args[k];
2914                     const char *name = NULL;
2915 
2916                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2917                         name = bswap_flag_name[flags];
2918                     }
2919                     if (name) {
2920                         col += ne_fprintf(f, ",%s", name);
2921                     } else {
2922                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2923                     }
2924                     i = k = 1;
2925                 }
2926                 break;
2927 #ifdef CONFIG_PLUGIN
2928             case INDEX_op_plugin_cb:
2929                 {
2930                     TCGArg from = op->args[k++];
2931                     const char *name = NULL;
2932 
2933                     if (from < ARRAY_SIZE(plugin_from_name)) {
2934                         name = plugin_from_name[from];
2935                     }
2936                     if (name) {
2937                         col += ne_fprintf(f, "%s", name);
2938                     } else {
2939                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2940                     }
2941                     i = 1;
2942                 }
2943                 break;
2944 #endif
2945             default:
2946                 i = 0;
2947                 break;
2948             }
2949             switch (c) {
2950             case INDEX_op_set_label:
2951             case INDEX_op_br:
2952             case INDEX_op_brcond_i32:
2953             case INDEX_op_brcond_i64:
2954             case INDEX_op_brcond2_i32:
2955                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2956                                   arg_label(op->args[k])->id);
2957                 i++, k++;
2958                 break;
2959             case INDEX_op_mb:
2960                 {
2961                     TCGBar membar = op->args[k];
2962                     const char *b_op, *m_op;
2963 
2964                     switch (membar & TCG_BAR_SC) {
2965                     case 0:
2966                         b_op = "none";
2967                         break;
2968                     case TCG_BAR_LDAQ:
2969                         b_op = "acq";
2970                         break;
2971                     case TCG_BAR_STRL:
2972                         b_op = "rel";
2973                         break;
2974                     case TCG_BAR_SC:
2975                         b_op = "seq";
2976                         break;
2977                     default:
2978                         g_assert_not_reached();
2979                     }
2980 
2981                     switch (membar & TCG_MO_ALL) {
2982                     case 0:
2983                         m_op = "none";
2984                         break;
2985                     case TCG_MO_LD_LD:
2986                         m_op = "rr";
2987                         break;
2988                     case TCG_MO_LD_ST:
2989                         m_op = "rw";
2990                         break;
2991                     case TCG_MO_ST_LD:
2992                         m_op = "wr";
2993                         break;
2994                     case TCG_MO_ST_ST:
2995                         m_op = "ww";
2996                         break;
2997                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2998                         m_op = "rr+rw";
2999                         break;
3000                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3001                         m_op = "rr+wr";
3002                         break;
3003                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3004                         m_op = "rr+ww";
3005                         break;
3006                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3007                         m_op = "rw+wr";
3008                         break;
3009                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3010                         m_op = "rw+ww";
3011                         break;
3012                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3013                         m_op = "wr+ww";
3014                         break;
3015                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3016                         m_op = "rr+rw+wr";
3017                         break;
3018                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3019                         m_op = "rr+rw+ww";
3020                         break;
3021                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3022                         m_op = "rr+wr+ww";
3023                         break;
3024                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3025                         m_op = "rw+wr+ww";
3026                         break;
3027                     case TCG_MO_ALL:
3028                         m_op = "all";
3029                         break;
3030                     default:
3031                         g_assert_not_reached();
3032                     }
3033 
3034                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3035                     i++, k++;
3036                 }
3037                 break;
3038             default:
3039                 break;
3040             }
3041             for (; i < nb_cargs; i++, k++) {
3042                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3043                                   op->args[k]);
3044             }
3045         }
3046 
3047         if (have_prefs || op->life) {
3048             for (; col < 40; ++col) {
3049                 putc(' ', f);
3050             }
3051         }
3052 
3053         if (op->life) {
3054             unsigned life = op->life;
3055 
3056             if (life & (SYNC_ARG * 3)) {
3057                 ne_fprintf(f, "  sync:");
3058                 for (i = 0; i < 2; ++i) {
3059                     if (life & (SYNC_ARG << i)) {
3060                         ne_fprintf(f, " %d", i);
3061                     }
3062                 }
3063             }
3064             life /= DEAD_ARG;
3065             if (life) {
3066                 ne_fprintf(f, "  dead:");
3067                 for (i = 0; life; ++i, life >>= 1) {
3068                     if (life & 1) {
3069                         ne_fprintf(f, " %d", i);
3070                     }
3071                 }
3072             }
3073         }
3074 
3075         if (have_prefs) {
3076             for (i = 0; i < nb_oargs; ++i) {
3077                 TCGRegSet set = output_pref(op, i);
3078 
3079                 if (i == 0) {
3080                     ne_fprintf(f, "  pref=");
3081                 } else {
3082                     ne_fprintf(f, ",");
3083                 }
3084                 if (set == 0) {
3085                     ne_fprintf(f, "none");
3086                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3087                     ne_fprintf(f, "all");
3088 #ifdef CONFIG_DEBUG_TCG
3089                 } else if (tcg_regset_single(set)) {
3090                     TCGReg reg = tcg_regset_first(set);
3091                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3092 #endif
3093                 } else if (TCG_TARGET_NB_REGS <= 32) {
3094                     ne_fprintf(f, "0x%x", (uint32_t)set);
3095                 } else {
3096                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3097                 }
3098             }
3099         }
3100 
3101         putc('\n', f);
3102     }
3103 }
3104 
3105 /* we give more priority to constraints with less registers */
3106 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3107 {
3108     int n;
3109 
3110     arg_ct += k;
3111     n = ctpop64(arg_ct->regs);
3112 
3113     /*
3114      * Sort constraints of a single register first, which includes output
3115      * aliases (which must exactly match the input already allocated).
3116      */
3117     if (n == 1 || arg_ct->oalias) {
3118         return INT_MAX;
3119     }
3120 
3121     /*
3122      * Sort register pairs next, first then second immediately after.
3123      * Arbitrarily sort multiple pairs by the index of the first reg;
3124      * there shouldn't be many pairs.
3125      */
3126     switch (arg_ct->pair) {
3127     case 1:
3128     case 3:
3129         return (k + 1) * 2;
3130     case 2:
3131         return (arg_ct->pair_index + 1) * 2 - 1;
3132     }
3133 
3134     /* Finally, sort by decreasing register count. */
3135     assert(n > 1);
3136     return -n;
3137 }
3138 
3139 /* sort from highest priority to lowest */
3140 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3141 {
3142     int i, j;
3143 
3144     for (i = 0; i < n; i++) {
3145         a[start + i].sort_index = start + i;
3146     }
3147     if (n <= 1) {
3148         return;
3149     }
3150     for (i = 0; i < n - 1; i++) {
3151         for (j = i + 1; j < n; j++) {
3152             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3153             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3154             if (p1 < p2) {
3155                 int tmp = a[start + i].sort_index;
3156                 a[start + i].sort_index = a[start + j].sort_index;
3157                 a[start + j].sort_index = tmp;
3158             }
3159         }
3160     }
3161 }
3162 
3163 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3164 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3165 
3166 static void process_constraint_sets(void)
3167 {
3168     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3169         const TCGConstraintSet *tdefs = &constraint_sets[c];
3170         TCGArgConstraint *args_ct = all_cts[c];
3171         int nb_oargs = tdefs->nb_oargs;
3172         int nb_iargs = tdefs->nb_iargs;
3173         int nb_args = nb_oargs + nb_iargs;
3174         bool saw_alias_pair = false;
3175 
3176         for (int i = 0; i < nb_args; i++) {
3177             const char *ct_str = tdefs->args_ct_str[i];
3178             bool input_p = i >= nb_oargs;
3179             int o;
3180 
3181             switch (*ct_str) {
3182             case '0' ... '9':
3183                 o = *ct_str - '0';
3184                 tcg_debug_assert(input_p);
3185                 tcg_debug_assert(o < nb_oargs);
3186                 tcg_debug_assert(args_ct[o].regs != 0);
3187                 tcg_debug_assert(!args_ct[o].oalias);
3188                 args_ct[i] = args_ct[o];
3189                 /* The output sets oalias.  */
3190                 args_ct[o].oalias = 1;
3191                 args_ct[o].alias_index = i;
3192                 /* The input sets ialias. */
3193                 args_ct[i].ialias = 1;
3194                 args_ct[i].alias_index = o;
3195                 if (args_ct[i].pair) {
3196                     saw_alias_pair = true;
3197                 }
3198                 tcg_debug_assert(ct_str[1] == '\0');
3199                 continue;
3200 
3201             case '&':
3202                 tcg_debug_assert(!input_p);
3203                 args_ct[i].newreg = true;
3204                 ct_str++;
3205                 break;
3206 
3207             case 'p': /* plus */
3208                 /* Allocate to the register after the previous. */
3209                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3210                 o = i - 1;
3211                 tcg_debug_assert(!args_ct[o].pair);
3212                 tcg_debug_assert(!args_ct[o].ct);
3213                 args_ct[i] = (TCGArgConstraint){
3214                     .pair = 2,
3215                     .pair_index = o,
3216                     .regs = args_ct[o].regs << 1,
3217                     .newreg = args_ct[o].newreg,
3218                 };
3219                 args_ct[o].pair = 1;
3220                 args_ct[o].pair_index = i;
3221                 tcg_debug_assert(ct_str[1] == '\0');
3222                 continue;
3223 
3224             case 'm': /* minus */
3225                 /* Allocate to the register before the previous. */
3226                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3227                 o = i - 1;
3228                 tcg_debug_assert(!args_ct[o].pair);
3229                 tcg_debug_assert(!args_ct[o].ct);
3230                 args_ct[i] = (TCGArgConstraint){
3231                     .pair = 1,
3232                     .pair_index = o,
3233                     .regs = args_ct[o].regs >> 1,
3234                     .newreg = args_ct[o].newreg,
3235                 };
3236                 args_ct[o].pair = 2;
3237                 args_ct[o].pair_index = i;
3238                 tcg_debug_assert(ct_str[1] == '\0');
3239                 continue;
3240             }
3241 
3242             do {
3243                 switch (*ct_str) {
3244                 case 'i':
3245                     args_ct[i].ct |= TCG_CT_CONST;
3246                     break;
3247 
3248                 /* Include all of the target-specific constraints. */
3249 
3250 #undef CONST
3251 #define CONST(CASE, MASK) \
3252     case CASE: args_ct[i].ct |= MASK; break;
3253 #define REGS(CASE, MASK) \
3254     case CASE: args_ct[i].regs |= MASK; break;
3255 
3256 #include "tcg-target-con-str.h"
3257 
3258 #undef REGS
3259 #undef CONST
3260                 default:
3261                 case '0' ... '9':
3262                 case '&':
3263                 case 'p':
3264                 case 'm':
3265                     /* Typo in TCGConstraintSet constraint. */
3266                     g_assert_not_reached();
3267                 }
3268             } while (*++ct_str != '\0');
3269         }
3270 
3271         /*
3272          * Fix up output pairs that are aliased with inputs.
3273          * When we created the alias, we copied pair from the output.
3274          * There are three cases:
3275          *    (1a) Pairs of inputs alias pairs of outputs.
3276          *    (1b) One input aliases the first of a pair of outputs.
3277          *    (2)  One input aliases the second of a pair of outputs.
3278          *
3279          * Case 1a is handled by making sure that the pair_index'es are
3280          * properly updated so that they appear the same as a pair of inputs.
3281          *
3282          * Case 1b is handled by setting the pair_index of the input to
3283          * itself, simply so it doesn't point to an unrelated argument.
3284          * Since we don't encounter the "second" during the input allocation
3285          * phase, nothing happens with the second half of the input pair.
3286          *
3287          * Case 2 is handled by setting the second input to pair=3, the
3288          * first output to pair=3, and the pair_index'es to match.
3289          */
3290         if (saw_alias_pair) {
3291             for (int i = nb_oargs; i < nb_args; i++) {
3292                 int o, o2, i2;
3293 
3294                 /*
3295                  * Since [0-9pm] must be alone in the constraint string,
3296                  * the only way they can both be set is if the pair comes
3297                  * from the output alias.
3298                  */
3299                 if (!args_ct[i].ialias) {
3300                     continue;
3301                 }
3302                 switch (args_ct[i].pair) {
3303                 case 0:
3304                     break;
3305                 case 1:
3306                     o = args_ct[i].alias_index;
3307                     o2 = args_ct[o].pair_index;
3308                     tcg_debug_assert(args_ct[o].pair == 1);
3309                     tcg_debug_assert(args_ct[o2].pair == 2);
3310                     if (args_ct[o2].oalias) {
3311                         /* Case 1a */
3312                         i2 = args_ct[o2].alias_index;
3313                         tcg_debug_assert(args_ct[i2].pair == 2);
3314                         args_ct[i2].pair_index = i;
3315                         args_ct[i].pair_index = i2;
3316                     } else {
3317                         /* Case 1b */
3318                         args_ct[i].pair_index = i;
3319                     }
3320                     break;
3321                 case 2:
3322                     o = args_ct[i].alias_index;
3323                     o2 = args_ct[o].pair_index;
3324                     tcg_debug_assert(args_ct[o].pair == 2);
3325                     tcg_debug_assert(args_ct[o2].pair == 1);
3326                     if (args_ct[o2].oalias) {
3327                         /* Case 1a */
3328                         i2 = args_ct[o2].alias_index;
3329                         tcg_debug_assert(args_ct[i2].pair == 1);
3330                         args_ct[i2].pair_index = i;
3331                         args_ct[i].pair_index = i2;
3332                     } else {
3333                         /* Case 2 */
3334                         args_ct[i].pair = 3;
3335                         args_ct[o2].pair = 3;
3336                         args_ct[i].pair_index = o2;
3337                         args_ct[o2].pair_index = i;
3338                     }
3339                     break;
3340                 default:
3341                     g_assert_not_reached();
3342                 }
3343             }
3344         }
3345 
3346         /* sort the constraints (XXX: this is just an heuristic) */
3347         sort_constraints(args_ct, 0, nb_oargs);
3348         sort_constraints(args_ct, nb_oargs, nb_iargs);
3349     }
3350 }
3351 
3352 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3353 {
3354     const TCGOpDef *def = &tcg_op_defs[op->opc];
3355     TCGConstraintSetIndex con_set;
3356 
3357 #ifdef CONFIG_DEBUG_TCG
3358     assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
3359 #endif
3360 
3361     if (def->flags & TCG_OPF_NOT_PRESENT) {
3362         return empty_cts;
3363     }
3364 
3365     con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op));
3366     tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
3367 
3368     /* The constraint arguments must match TCGOpcode arguments. */
3369     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3370     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3371 
3372     return all_cts[con_set];
3373 }
3374 
3375 static void remove_label_use(TCGOp *op, int idx)
3376 {
3377     TCGLabel *label = arg_label(op->args[idx]);
3378     TCGLabelUse *use;
3379 
3380     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3381         if (use->op == op) {
3382             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3383             return;
3384         }
3385     }
3386     g_assert_not_reached();
3387 }
3388 
3389 void tcg_op_remove(TCGContext *s, TCGOp *op)
3390 {
3391     switch (op->opc) {
3392     case INDEX_op_br:
3393         remove_label_use(op, 0);
3394         break;
3395     case INDEX_op_brcond_i32:
3396     case INDEX_op_brcond_i64:
3397         remove_label_use(op, 3);
3398         break;
3399     case INDEX_op_brcond2_i32:
3400         remove_label_use(op, 5);
3401         break;
3402     default:
3403         break;
3404     }
3405 
3406     QTAILQ_REMOVE(&s->ops, op, link);
3407     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3408     s->nb_ops--;
3409 }
3410 
3411 void tcg_remove_ops_after(TCGOp *op)
3412 {
3413     TCGContext *s = tcg_ctx;
3414 
3415     while (true) {
3416         TCGOp *last = tcg_last_op();
3417         if (last == op) {
3418             return;
3419         }
3420         tcg_op_remove(s, last);
3421     }
3422 }
3423 
3424 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3425 {
3426     TCGContext *s = tcg_ctx;
3427     TCGOp *op = NULL;
3428 
3429     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3430         QTAILQ_FOREACH(op, &s->free_ops, link) {
3431             if (nargs <= op->nargs) {
3432                 QTAILQ_REMOVE(&s->free_ops, op, link);
3433                 nargs = op->nargs;
3434                 goto found;
3435             }
3436         }
3437     }
3438 
3439     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3440     nargs = MAX(4, nargs);
3441     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3442 
3443  found:
3444     memset(op, 0, offsetof(TCGOp, link));
3445     op->opc = opc;
3446     op->nargs = nargs;
3447 
3448     /* Check for bitfield overflow. */
3449     tcg_debug_assert(op->nargs == nargs);
3450 
3451     s->nb_ops++;
3452     return op;
3453 }
3454 
3455 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3456 {
3457     TCGOp *op = tcg_op_alloc(opc, nargs);
3458 
3459     if (tcg_ctx->emit_before_op) {
3460         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3461     } else {
3462         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3463     }
3464     return op;
3465 }
3466 
3467 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3468                             TCGOpcode opc, unsigned nargs)
3469 {
3470     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3471 
3472     TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
3473     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3474     return new_op;
3475 }
3476 
3477 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3478                            TCGOpcode opc, unsigned nargs)
3479 {
3480     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3481 
3482     TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
3483     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3484     return new_op;
3485 }
3486 
3487 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3488 {
3489     TCGLabelUse *u;
3490 
3491     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3492         TCGOp *op = u->op;
3493         switch (op->opc) {
3494         case INDEX_op_br:
3495             op->args[0] = label_arg(to);
3496             break;
3497         case INDEX_op_brcond_i32:
3498         case INDEX_op_brcond_i64:
3499             op->args[3] = label_arg(to);
3500             break;
3501         case INDEX_op_brcond2_i32:
3502             op->args[5] = label_arg(to);
3503             break;
3504         default:
3505             g_assert_not_reached();
3506         }
3507     }
3508 
3509     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3510 }
3511 
3512 /* Reachable analysis : remove unreachable code.  */
3513 static void __attribute__((noinline))
3514 reachable_code_pass(TCGContext *s)
3515 {
3516     TCGOp *op, *op_next, *op_prev;
3517     bool dead = false;
3518 
3519     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3520         bool remove = dead;
3521         TCGLabel *label;
3522 
3523         switch (op->opc) {
3524         case INDEX_op_set_label:
3525             label = arg_label(op->args[0]);
3526 
3527             /*
3528              * Note that the first op in the TB is always a load,
3529              * so there is always something before a label.
3530              */
3531             op_prev = QTAILQ_PREV(op, link);
3532 
3533             /*
3534              * If we find two sequential labels, move all branches to
3535              * reference the second label and remove the first label.
3536              * Do this before branch to next optimization, so that the
3537              * middle label is out of the way.
3538              */
3539             if (op_prev->opc == INDEX_op_set_label) {
3540                 move_label_uses(label, arg_label(op_prev->args[0]));
3541                 tcg_op_remove(s, op_prev);
3542                 op_prev = QTAILQ_PREV(op, link);
3543             }
3544 
3545             /*
3546              * Optimization can fold conditional branches to unconditional.
3547              * If we find a label which is preceded by an unconditional
3548              * branch to next, remove the branch.  We couldn't do this when
3549              * processing the branch because any dead code between the branch
3550              * and label had not yet been removed.
3551              */
3552             if (op_prev->opc == INDEX_op_br &&
3553                 label == arg_label(op_prev->args[0])) {
3554                 tcg_op_remove(s, op_prev);
3555                 /* Fall through means insns become live again.  */
3556                 dead = false;
3557             }
3558 
3559             if (QSIMPLEQ_EMPTY(&label->branches)) {
3560                 /*
3561                  * While there is an occasional backward branch, virtually
3562                  * all branches generated by the translators are forward.
3563                  * Which means that generally we will have already removed
3564                  * all references to the label that will be, and there is
3565                  * little to be gained by iterating.
3566                  */
3567                 remove = true;
3568             } else {
3569                 /* Once we see a label, insns become live again.  */
3570                 dead = false;
3571                 remove = false;
3572             }
3573             break;
3574 
3575         case INDEX_op_br:
3576         case INDEX_op_exit_tb:
3577         case INDEX_op_goto_ptr:
3578             /* Unconditional branches; everything following is dead.  */
3579             dead = true;
3580             break;
3581 
3582         case INDEX_op_call:
3583             /* Notice noreturn helper calls, raising exceptions.  */
3584             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3585                 dead = true;
3586             }
3587             break;
3588 
3589         case INDEX_op_insn_start:
3590             /* Never remove -- we need to keep these for unwind.  */
3591             remove = false;
3592             break;
3593 
3594         default:
3595             break;
3596         }
3597 
3598         if (remove) {
3599             tcg_op_remove(s, op);
3600         }
3601     }
3602 }
3603 
3604 #define TS_DEAD  1
3605 #define TS_MEM   2
3606 
3607 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3608 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3609 
3610 /* For liveness_pass_1, the register preferences for a given temp.  */
3611 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3612 {
3613     return ts->state_ptr;
3614 }
3615 
3616 /* For liveness_pass_1, reset the preferences for a given temp to the
3617  * maximal regset for its type.
3618  */
3619 static inline void la_reset_pref(TCGTemp *ts)
3620 {
3621     *la_temp_pref(ts)
3622         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3623 }
3624 
3625 /* liveness analysis: end of function: all temps are dead, and globals
3626    should be in memory. */
3627 static void la_func_end(TCGContext *s, int ng, int nt)
3628 {
3629     int i;
3630 
3631     for (i = 0; i < ng; ++i) {
3632         s->temps[i].state = TS_DEAD | TS_MEM;
3633         la_reset_pref(&s->temps[i]);
3634     }
3635     for (i = ng; i < nt; ++i) {
3636         s->temps[i].state = TS_DEAD;
3637         la_reset_pref(&s->temps[i]);
3638     }
3639 }
3640 
3641 /* liveness analysis: end of basic block: all temps are dead, globals
3642    and local temps should be in memory. */
3643 static void la_bb_end(TCGContext *s, int ng, int nt)
3644 {
3645     int i;
3646 
3647     for (i = 0; i < nt; ++i) {
3648         TCGTemp *ts = &s->temps[i];
3649         int state;
3650 
3651         switch (ts->kind) {
3652         case TEMP_FIXED:
3653         case TEMP_GLOBAL:
3654         case TEMP_TB:
3655             state = TS_DEAD | TS_MEM;
3656             break;
3657         case TEMP_EBB:
3658         case TEMP_CONST:
3659             state = TS_DEAD;
3660             break;
3661         default:
3662             g_assert_not_reached();
3663         }
3664         ts->state = state;
3665         la_reset_pref(ts);
3666     }
3667 }
3668 
3669 /* liveness analysis: sync globals back to memory.  */
3670 static void la_global_sync(TCGContext *s, int ng)
3671 {
3672     int i;
3673 
3674     for (i = 0; i < ng; ++i) {
3675         int state = s->temps[i].state;
3676         s->temps[i].state = state | TS_MEM;
3677         if (state == TS_DEAD) {
3678             /* If the global was previously dead, reset prefs.  */
3679             la_reset_pref(&s->temps[i]);
3680         }
3681     }
3682 }
3683 
3684 /*
3685  * liveness analysis: conditional branch: all temps are dead unless
3686  * explicitly live-across-conditional-branch, globals and local temps
3687  * should be synced.
3688  */
3689 static void la_bb_sync(TCGContext *s, int ng, int nt)
3690 {
3691     la_global_sync(s, ng);
3692 
3693     for (int i = ng; i < nt; ++i) {
3694         TCGTemp *ts = &s->temps[i];
3695         int state;
3696 
3697         switch (ts->kind) {
3698         case TEMP_TB:
3699             state = ts->state;
3700             ts->state = state | TS_MEM;
3701             if (state != TS_DEAD) {
3702                 continue;
3703             }
3704             break;
3705         case TEMP_EBB:
3706         case TEMP_CONST:
3707             continue;
3708         default:
3709             g_assert_not_reached();
3710         }
3711         la_reset_pref(&s->temps[i]);
3712     }
3713 }
3714 
3715 /* liveness analysis: sync globals back to memory and kill.  */
3716 static void la_global_kill(TCGContext *s, int ng)
3717 {
3718     int i;
3719 
3720     for (i = 0; i < ng; i++) {
3721         s->temps[i].state = TS_DEAD | TS_MEM;
3722         la_reset_pref(&s->temps[i]);
3723     }
3724 }
3725 
3726 /* liveness analysis: note live globals crossing calls.  */
3727 static void la_cross_call(TCGContext *s, int nt)
3728 {
3729     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3730     int i;
3731 
3732     for (i = 0; i < nt; i++) {
3733         TCGTemp *ts = &s->temps[i];
3734         if (!(ts->state & TS_DEAD)) {
3735             TCGRegSet *pset = la_temp_pref(ts);
3736             TCGRegSet set = *pset;
3737 
3738             set &= mask;
3739             /* If the combination is not possible, restart.  */
3740             if (set == 0) {
3741                 set = tcg_target_available_regs[ts->type] & mask;
3742             }
3743             *pset = set;
3744         }
3745     }
3746 }
3747 
3748 /*
3749  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3750  * to TEMP_EBB, if possible.
3751  */
3752 static void __attribute__((noinline))
3753 liveness_pass_0(TCGContext *s)
3754 {
3755     void * const multiple_ebb = (void *)(uintptr_t)-1;
3756     int nb_temps = s->nb_temps;
3757     TCGOp *op, *ebb;
3758 
3759     for (int i = s->nb_globals; i < nb_temps; ++i) {
3760         s->temps[i].state_ptr = NULL;
3761     }
3762 
3763     /*
3764      * Represent each EBB by the op at which it begins.  In the case of
3765      * the first EBB, this is the first op, otherwise it is a label.
3766      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3767      * within a single EBB, else MULTIPLE_EBB.
3768      */
3769     ebb = QTAILQ_FIRST(&s->ops);
3770     QTAILQ_FOREACH(op, &s->ops, link) {
3771         const TCGOpDef *def;
3772         int nb_oargs, nb_iargs;
3773 
3774         switch (op->opc) {
3775         case INDEX_op_set_label:
3776             ebb = op;
3777             continue;
3778         case INDEX_op_discard:
3779             continue;
3780         case INDEX_op_call:
3781             nb_oargs = TCGOP_CALLO(op);
3782             nb_iargs = TCGOP_CALLI(op);
3783             break;
3784         default:
3785             def = &tcg_op_defs[op->opc];
3786             nb_oargs = def->nb_oargs;
3787             nb_iargs = def->nb_iargs;
3788             break;
3789         }
3790 
3791         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3792             TCGTemp *ts = arg_temp(op->args[i]);
3793 
3794             if (ts->kind != TEMP_TB) {
3795                 continue;
3796             }
3797             if (ts->state_ptr == NULL) {
3798                 ts->state_ptr = ebb;
3799             } else if (ts->state_ptr != ebb) {
3800                 ts->state_ptr = multiple_ebb;
3801             }
3802         }
3803     }
3804 
3805     /*
3806      * For TEMP_TB that turned out not to be used beyond one EBB,
3807      * reduce the liveness to TEMP_EBB.
3808      */
3809     for (int i = s->nb_globals; i < nb_temps; ++i) {
3810         TCGTemp *ts = &s->temps[i];
3811         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3812             ts->kind = TEMP_EBB;
3813         }
3814     }
3815 }
3816 
3817 /* Liveness analysis : update the opc_arg_life array to tell if a
3818    given input arguments is dead. Instructions updating dead
3819    temporaries are removed. */
3820 static void __attribute__((noinline))
3821 liveness_pass_1(TCGContext *s)
3822 {
3823     int nb_globals = s->nb_globals;
3824     int nb_temps = s->nb_temps;
3825     TCGOp *op, *op_prev;
3826     TCGRegSet *prefs;
3827     int i;
3828 
3829     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3830     for (i = 0; i < nb_temps; ++i) {
3831         s->temps[i].state_ptr = prefs + i;
3832     }
3833 
3834     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3835     la_func_end(s, nb_globals, nb_temps);
3836 
3837     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3838         int nb_iargs, nb_oargs;
3839         TCGOpcode opc_new, opc_new2;
3840         bool have_opc_new2;
3841         TCGLifeData arg_life = 0;
3842         TCGTemp *ts;
3843         TCGOpcode opc = op->opc;
3844         const TCGOpDef *def = &tcg_op_defs[opc];
3845         const TCGArgConstraint *args_ct;
3846 
3847         switch (opc) {
3848         case INDEX_op_call:
3849             {
3850                 const TCGHelperInfo *info = tcg_call_info(op);
3851                 int call_flags = tcg_call_flags(op);
3852 
3853                 nb_oargs = TCGOP_CALLO(op);
3854                 nb_iargs = TCGOP_CALLI(op);
3855 
3856                 /* pure functions can be removed if their result is unused */
3857                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3858                     for (i = 0; i < nb_oargs; i++) {
3859                         ts = arg_temp(op->args[i]);
3860                         if (ts->state != TS_DEAD) {
3861                             goto do_not_remove_call;
3862                         }
3863                     }
3864                     goto do_remove;
3865                 }
3866             do_not_remove_call:
3867 
3868                 /* Output args are dead.  */
3869                 for (i = 0; i < nb_oargs; i++) {
3870                     ts = arg_temp(op->args[i]);
3871                     if (ts->state & TS_DEAD) {
3872                         arg_life |= DEAD_ARG << i;
3873                     }
3874                     if (ts->state & TS_MEM) {
3875                         arg_life |= SYNC_ARG << i;
3876                     }
3877                     ts->state = TS_DEAD;
3878                     la_reset_pref(ts);
3879                 }
3880 
3881                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3882                 memset(op->output_pref, 0, sizeof(op->output_pref));
3883 
3884                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3885                                     TCG_CALL_NO_READ_GLOBALS))) {
3886                     la_global_kill(s, nb_globals);
3887                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3888                     la_global_sync(s, nb_globals);
3889                 }
3890 
3891                 /* Record arguments that die in this helper.  */
3892                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3893                     ts = arg_temp(op->args[i]);
3894                     if (ts->state & TS_DEAD) {
3895                         arg_life |= DEAD_ARG << i;
3896                     }
3897                 }
3898 
3899                 /* For all live registers, remove call-clobbered prefs.  */
3900                 la_cross_call(s, nb_temps);
3901 
3902                 /*
3903                  * Input arguments are live for preceding opcodes.
3904                  *
3905                  * For those arguments that die, and will be allocated in
3906                  * registers, clear the register set for that arg, to be
3907                  * filled in below.  For args that will be on the stack,
3908                  * reset to any available reg.  Process arguments in reverse
3909                  * order so that if a temp is used more than once, the stack
3910                  * reset to max happens before the register reset to 0.
3911                  */
3912                 for (i = nb_iargs - 1; i >= 0; i--) {
3913                     const TCGCallArgumentLoc *loc = &info->in[i];
3914                     ts = arg_temp(op->args[nb_oargs + i]);
3915 
3916                     if (ts->state & TS_DEAD) {
3917                         switch (loc->kind) {
3918                         case TCG_CALL_ARG_NORMAL:
3919                         case TCG_CALL_ARG_EXTEND_U:
3920                         case TCG_CALL_ARG_EXTEND_S:
3921                             if (arg_slot_reg_p(loc->arg_slot)) {
3922                                 *la_temp_pref(ts) = 0;
3923                                 break;
3924                             }
3925                             /* fall through */
3926                         default:
3927                             *la_temp_pref(ts) =
3928                                 tcg_target_available_regs[ts->type];
3929                             break;
3930                         }
3931                         ts->state &= ~TS_DEAD;
3932                     }
3933                 }
3934 
3935                 /*
3936                  * For each input argument, add its input register to prefs.
3937                  * If a temp is used once, this produces a single set bit;
3938                  * if a temp is used multiple times, this produces a set.
3939                  */
3940                 for (i = 0; i < nb_iargs; i++) {
3941                     const TCGCallArgumentLoc *loc = &info->in[i];
3942                     ts = arg_temp(op->args[nb_oargs + i]);
3943 
3944                     switch (loc->kind) {
3945                     case TCG_CALL_ARG_NORMAL:
3946                     case TCG_CALL_ARG_EXTEND_U:
3947                     case TCG_CALL_ARG_EXTEND_S:
3948                         if (arg_slot_reg_p(loc->arg_slot)) {
3949                             tcg_regset_set_reg(*la_temp_pref(ts),
3950                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3951                         }
3952                         break;
3953                     default:
3954                         break;
3955                     }
3956                 }
3957             }
3958             break;
3959         case INDEX_op_insn_start:
3960             break;
3961         case INDEX_op_discard:
3962             /* mark the temporary as dead */
3963             ts = arg_temp(op->args[0]);
3964             ts->state = TS_DEAD;
3965             la_reset_pref(ts);
3966             break;
3967 
3968         case INDEX_op_add2_i32:
3969             opc_new = INDEX_op_add_i32;
3970             goto do_addsub2;
3971         case INDEX_op_sub2_i32:
3972             opc_new = INDEX_op_sub_i32;
3973             goto do_addsub2;
3974         case INDEX_op_add2_i64:
3975             opc_new = INDEX_op_add_i64;
3976             goto do_addsub2;
3977         case INDEX_op_sub2_i64:
3978             opc_new = INDEX_op_sub_i64;
3979         do_addsub2:
3980             nb_iargs = 4;
3981             nb_oargs = 2;
3982             /* Test if the high part of the operation is dead, but not
3983                the low part.  The result can be optimized to a simple
3984                add or sub.  This happens often for x86_64 guest when the
3985                cpu mode is set to 32 bit.  */
3986             if (arg_temp(op->args[1])->state == TS_DEAD) {
3987                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3988                     goto do_remove;
3989                 }
3990                 /* Replace the opcode and adjust the args in place,
3991                    leaving 3 unused args at the end.  */
3992                 op->opc = opc = opc_new;
3993                 op->args[1] = op->args[2];
3994                 op->args[2] = op->args[4];
3995                 /* Fall through and mark the single-word operation live.  */
3996                 nb_iargs = 2;
3997                 nb_oargs = 1;
3998             }
3999             goto do_not_remove;
4000 
4001         case INDEX_op_mulu2_i32:
4002             opc_new = INDEX_op_mul_i32;
4003             opc_new2 = INDEX_op_muluh_i32;
4004             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4005             goto do_mul2;
4006         case INDEX_op_muls2_i32:
4007             opc_new = INDEX_op_mul_i32;
4008             opc_new2 = INDEX_op_mulsh_i32;
4009             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4010             goto do_mul2;
4011         case INDEX_op_mulu2_i64:
4012             opc_new = INDEX_op_mul_i64;
4013             opc_new2 = INDEX_op_muluh_i64;
4014             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4015             goto do_mul2;
4016         case INDEX_op_muls2_i64:
4017             opc_new = INDEX_op_mul_i64;
4018             opc_new2 = INDEX_op_mulsh_i64;
4019             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4020             goto do_mul2;
4021         do_mul2:
4022             nb_iargs = 2;
4023             nb_oargs = 2;
4024             if (arg_temp(op->args[1])->state == TS_DEAD) {
4025                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4026                     /* Both parts of the operation are dead.  */
4027                     goto do_remove;
4028                 }
4029                 /* The high part of the operation is dead; generate the low. */
4030                 op->opc = opc = opc_new;
4031                 op->args[1] = op->args[2];
4032                 op->args[2] = op->args[3];
4033             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4034                 /* The low part of the operation is dead; generate the high. */
4035                 op->opc = opc = opc_new2;
4036                 op->args[0] = op->args[1];
4037                 op->args[1] = op->args[2];
4038                 op->args[2] = op->args[3];
4039             } else {
4040                 goto do_not_remove;
4041             }
4042             /* Mark the single-word operation live.  */
4043             nb_oargs = 1;
4044             goto do_not_remove;
4045 
4046         default:
4047             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4048             nb_iargs = def->nb_iargs;
4049             nb_oargs = def->nb_oargs;
4050 
4051             /* Test if the operation can be removed because all
4052                its outputs are dead. We assume that nb_oargs == 0
4053                implies side effects */
4054             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4055                 for (i = 0; i < nb_oargs; i++) {
4056                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4057                         goto do_not_remove;
4058                     }
4059                 }
4060                 goto do_remove;
4061             }
4062             goto do_not_remove;
4063 
4064         do_remove:
4065             tcg_op_remove(s, op);
4066             break;
4067 
4068         do_not_remove:
4069             for (i = 0; i < nb_oargs; i++) {
4070                 ts = arg_temp(op->args[i]);
4071 
4072                 /* Remember the preference of the uses that followed.  */
4073                 if (i < ARRAY_SIZE(op->output_pref)) {
4074                     op->output_pref[i] = *la_temp_pref(ts);
4075                 }
4076 
4077                 /* Output args are dead.  */
4078                 if (ts->state & TS_DEAD) {
4079                     arg_life |= DEAD_ARG << i;
4080                 }
4081                 if (ts->state & TS_MEM) {
4082                     arg_life |= SYNC_ARG << i;
4083                 }
4084                 ts->state = TS_DEAD;
4085                 la_reset_pref(ts);
4086             }
4087 
4088             /* If end of basic block, update.  */
4089             if (def->flags & TCG_OPF_BB_EXIT) {
4090                 la_func_end(s, nb_globals, nb_temps);
4091             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4092                 la_bb_sync(s, nb_globals, nb_temps);
4093             } else if (def->flags & TCG_OPF_BB_END) {
4094                 la_bb_end(s, nb_globals, nb_temps);
4095             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4096                 la_global_sync(s, nb_globals);
4097                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4098                     la_cross_call(s, nb_temps);
4099                 }
4100             }
4101 
4102             /* Record arguments that die in this opcode.  */
4103             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4104                 ts = arg_temp(op->args[i]);
4105                 if (ts->state & TS_DEAD) {
4106                     arg_life |= DEAD_ARG << i;
4107                 }
4108             }
4109 
4110             /* Input arguments are live for preceding opcodes.  */
4111             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4112                 ts = arg_temp(op->args[i]);
4113                 if (ts->state & TS_DEAD) {
4114                     /* For operands that were dead, initially allow
4115                        all regs for the type.  */
4116                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4117                     ts->state &= ~TS_DEAD;
4118                 }
4119             }
4120 
4121             /* Incorporate constraints for this operand.  */
4122             switch (opc) {
4123             case INDEX_op_mov_i32:
4124             case INDEX_op_mov_i64:
4125                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4126                    have proper constraints.  That said, special case
4127                    moves to propagate preferences backward.  */
4128                 if (IS_DEAD_ARG(1)) {
4129                     *la_temp_pref(arg_temp(op->args[0]))
4130                         = *la_temp_pref(arg_temp(op->args[1]));
4131                 }
4132                 break;
4133 
4134             default:
4135                 args_ct = opcode_args_ct(op);
4136                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4137                     const TCGArgConstraint *ct = &args_ct[i];
4138                     TCGRegSet set, *pset;
4139 
4140                     ts = arg_temp(op->args[i]);
4141                     pset = la_temp_pref(ts);
4142                     set = *pset;
4143 
4144                     set &= ct->regs;
4145                     if (ct->ialias) {
4146                         set &= output_pref(op, ct->alias_index);
4147                     }
4148                     /* If the combination is not possible, restart.  */
4149                     if (set == 0) {
4150                         set = ct->regs;
4151                     }
4152                     *pset = set;
4153                 }
4154                 break;
4155             }
4156             break;
4157         }
4158         op->life = arg_life;
4159     }
4160 }
4161 
4162 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4163 static bool __attribute__((noinline))
4164 liveness_pass_2(TCGContext *s)
4165 {
4166     int nb_globals = s->nb_globals;
4167     int nb_temps, i;
4168     bool changes = false;
4169     TCGOp *op, *op_next;
4170 
4171     /* Create a temporary for each indirect global.  */
4172     for (i = 0; i < nb_globals; ++i) {
4173         TCGTemp *its = &s->temps[i];
4174         if (its->indirect_reg) {
4175             TCGTemp *dts = tcg_temp_alloc(s);
4176             dts->type = its->type;
4177             dts->base_type = its->base_type;
4178             dts->temp_subindex = its->temp_subindex;
4179             dts->kind = TEMP_EBB;
4180             its->state_ptr = dts;
4181         } else {
4182             its->state_ptr = NULL;
4183         }
4184         /* All globals begin dead.  */
4185         its->state = TS_DEAD;
4186     }
4187     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4188         TCGTemp *its = &s->temps[i];
4189         its->state_ptr = NULL;
4190         its->state = TS_DEAD;
4191     }
4192 
4193     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4194         TCGOpcode opc = op->opc;
4195         const TCGOpDef *def = &tcg_op_defs[opc];
4196         TCGLifeData arg_life = op->life;
4197         int nb_iargs, nb_oargs, call_flags;
4198         TCGTemp *arg_ts, *dir_ts;
4199 
4200         if (opc == INDEX_op_call) {
4201             nb_oargs = TCGOP_CALLO(op);
4202             nb_iargs = TCGOP_CALLI(op);
4203             call_flags = tcg_call_flags(op);
4204         } else {
4205             nb_iargs = def->nb_iargs;
4206             nb_oargs = def->nb_oargs;
4207 
4208             /* Set flags similar to how calls require.  */
4209             if (def->flags & TCG_OPF_COND_BRANCH) {
4210                 /* Like reading globals: sync_globals */
4211                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4212             } else if (def->flags & TCG_OPF_BB_END) {
4213                 /* Like writing globals: save_globals */
4214                 call_flags = 0;
4215             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4216                 /* Like reading globals: sync_globals */
4217                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4218             } else {
4219                 /* No effect on globals.  */
4220                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4221                               TCG_CALL_NO_WRITE_GLOBALS);
4222             }
4223         }
4224 
4225         /* Make sure that input arguments are available.  */
4226         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4227             arg_ts = arg_temp(op->args[i]);
4228             dir_ts = arg_ts->state_ptr;
4229             if (dir_ts && arg_ts->state == TS_DEAD) {
4230                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4231                                   ? INDEX_op_ld_i32
4232                                   : INDEX_op_ld_i64);
4233                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
4234 
4235                 lop->args[0] = temp_arg(dir_ts);
4236                 lop->args[1] = temp_arg(arg_ts->mem_base);
4237                 lop->args[2] = arg_ts->mem_offset;
4238 
4239                 /* Loaded, but synced with memory.  */
4240                 arg_ts->state = TS_MEM;
4241             }
4242         }
4243 
4244         /* Perform input replacement, and mark inputs that became dead.
4245            No action is required except keeping temp_state up to date
4246            so that we reload when needed.  */
4247         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4248             arg_ts = arg_temp(op->args[i]);
4249             dir_ts = arg_ts->state_ptr;
4250             if (dir_ts) {
4251                 op->args[i] = temp_arg(dir_ts);
4252                 changes = true;
4253                 if (IS_DEAD_ARG(i)) {
4254                     arg_ts->state = TS_DEAD;
4255                 }
4256             }
4257         }
4258 
4259         /* Liveness analysis should ensure that the following are
4260            all correct, for call sites and basic block end points.  */
4261         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4262             /* Nothing to do */
4263         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4264             for (i = 0; i < nb_globals; ++i) {
4265                 /* Liveness should see that globals are synced back,
4266                    that is, either TS_DEAD or TS_MEM.  */
4267                 arg_ts = &s->temps[i];
4268                 tcg_debug_assert(arg_ts->state_ptr == 0
4269                                  || arg_ts->state != 0);
4270             }
4271         } else {
4272             for (i = 0; i < nb_globals; ++i) {
4273                 /* Liveness should see that globals are saved back,
4274                    that is, TS_DEAD, waiting to be reloaded.  */
4275                 arg_ts = &s->temps[i];
4276                 tcg_debug_assert(arg_ts->state_ptr == 0
4277                                  || arg_ts->state == TS_DEAD);
4278             }
4279         }
4280 
4281         /* Outputs become available.  */
4282         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4283             arg_ts = arg_temp(op->args[0]);
4284             dir_ts = arg_ts->state_ptr;
4285             if (dir_ts) {
4286                 op->args[0] = temp_arg(dir_ts);
4287                 changes = true;
4288 
4289                 /* The output is now live and modified.  */
4290                 arg_ts->state = 0;
4291 
4292                 if (NEED_SYNC_ARG(0)) {
4293                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4294                                       ? INDEX_op_st_i32
4295                                       : INDEX_op_st_i64);
4296                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4297                     TCGTemp *out_ts = dir_ts;
4298 
4299                     if (IS_DEAD_ARG(0)) {
4300                         out_ts = arg_temp(op->args[1]);
4301                         arg_ts->state = TS_DEAD;
4302                         tcg_op_remove(s, op);
4303                     } else {
4304                         arg_ts->state = TS_MEM;
4305                     }
4306 
4307                     sop->args[0] = temp_arg(out_ts);
4308                     sop->args[1] = temp_arg(arg_ts->mem_base);
4309                     sop->args[2] = arg_ts->mem_offset;
4310                 } else {
4311                     tcg_debug_assert(!IS_DEAD_ARG(0));
4312                 }
4313             }
4314         } else {
4315             for (i = 0; i < nb_oargs; i++) {
4316                 arg_ts = arg_temp(op->args[i]);
4317                 dir_ts = arg_ts->state_ptr;
4318                 if (!dir_ts) {
4319                     continue;
4320                 }
4321                 op->args[i] = temp_arg(dir_ts);
4322                 changes = true;
4323 
4324                 /* The output is now live and modified.  */
4325                 arg_ts->state = 0;
4326 
4327                 /* Sync outputs upon their last write.  */
4328                 if (NEED_SYNC_ARG(i)) {
4329                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4330                                       ? INDEX_op_st_i32
4331                                       : INDEX_op_st_i64);
4332                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4333 
4334                     sop->args[0] = temp_arg(dir_ts);
4335                     sop->args[1] = temp_arg(arg_ts->mem_base);
4336                     sop->args[2] = arg_ts->mem_offset;
4337 
4338                     arg_ts->state = TS_MEM;
4339                 }
4340                 /* Drop outputs that are dead.  */
4341                 if (IS_DEAD_ARG(i)) {
4342                     arg_ts->state = TS_DEAD;
4343                 }
4344             }
4345         }
4346     }
4347 
4348     return changes;
4349 }
4350 
4351 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4352 {
4353     intptr_t off;
4354     int size, align;
4355 
4356     /* When allocating an object, look at the full type. */
4357     size = tcg_type_size(ts->base_type);
4358     switch (ts->base_type) {
4359     case TCG_TYPE_I32:
4360         align = 4;
4361         break;
4362     case TCG_TYPE_I64:
4363     case TCG_TYPE_V64:
4364         align = 8;
4365         break;
4366     case TCG_TYPE_I128:
4367     case TCG_TYPE_V128:
4368     case TCG_TYPE_V256:
4369         /*
4370          * Note that we do not require aligned storage for V256,
4371          * and that we provide alignment for I128 to match V128,
4372          * even if that's above what the host ABI requires.
4373          */
4374         align = 16;
4375         break;
4376     default:
4377         g_assert_not_reached();
4378     }
4379 
4380     /*
4381      * Assume the stack is sufficiently aligned.
4382      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4383      * and do not require 16 byte vector alignment.  This seems slightly
4384      * easier than fully parameterizing the above switch statement.
4385      */
4386     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4387     off = ROUND_UP(s->current_frame_offset, align);
4388 
4389     /* If we've exhausted the stack frame, restart with a smaller TB. */
4390     if (off + size > s->frame_end) {
4391         tcg_raise_tb_overflow(s);
4392     }
4393     s->current_frame_offset = off + size;
4394 #if defined(__sparc__)
4395     off += TCG_TARGET_STACK_BIAS;
4396 #endif
4397 
4398     /* If the object was subdivided, assign memory to all the parts. */
4399     if (ts->base_type != ts->type) {
4400         int part_size = tcg_type_size(ts->type);
4401         int part_count = size / part_size;
4402 
4403         /*
4404          * Each part is allocated sequentially in tcg_temp_new_internal.
4405          * Jump back to the first part by subtracting the current index.
4406          */
4407         ts -= ts->temp_subindex;
4408         for (int i = 0; i < part_count; ++i) {
4409             ts[i].mem_offset = off + i * part_size;
4410             ts[i].mem_base = s->frame_temp;
4411             ts[i].mem_allocated = 1;
4412         }
4413     } else {
4414         ts->mem_offset = off;
4415         ts->mem_base = s->frame_temp;
4416         ts->mem_allocated = 1;
4417     }
4418 }
4419 
4420 /* Assign @reg to @ts, and update reg_to_temp[]. */
4421 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4422 {
4423     if (ts->val_type == TEMP_VAL_REG) {
4424         TCGReg old = ts->reg;
4425         tcg_debug_assert(s->reg_to_temp[old] == ts);
4426         if (old == reg) {
4427             return;
4428         }
4429         s->reg_to_temp[old] = NULL;
4430     }
4431     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4432     s->reg_to_temp[reg] = ts;
4433     ts->val_type = TEMP_VAL_REG;
4434     ts->reg = reg;
4435 }
4436 
4437 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4438 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4439 {
4440     tcg_debug_assert(type != TEMP_VAL_REG);
4441     if (ts->val_type == TEMP_VAL_REG) {
4442         TCGReg reg = ts->reg;
4443         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4444         s->reg_to_temp[reg] = NULL;
4445     }
4446     ts->val_type = type;
4447 }
4448 
4449 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4450 
4451 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4452    mark it free; otherwise mark it dead.  */
4453 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4454 {
4455     TCGTempVal new_type;
4456 
4457     switch (ts->kind) {
4458     case TEMP_FIXED:
4459         return;
4460     case TEMP_GLOBAL:
4461     case TEMP_TB:
4462         new_type = TEMP_VAL_MEM;
4463         break;
4464     case TEMP_EBB:
4465         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4466         break;
4467     case TEMP_CONST:
4468         new_type = TEMP_VAL_CONST;
4469         break;
4470     default:
4471         g_assert_not_reached();
4472     }
4473     set_temp_val_nonreg(s, ts, new_type);
4474 }
4475 
4476 /* Mark a temporary as dead.  */
4477 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4478 {
4479     temp_free_or_dead(s, ts, 1);
4480 }
4481 
4482 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4483    registers needs to be allocated to store a constant.  If 'free_or_dead'
4484    is non-zero, subsequently release the temporary; if it is positive, the
4485    temp is dead; if it is negative, the temp is free.  */
4486 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4487                       TCGRegSet preferred_regs, int free_or_dead)
4488 {
4489     if (!temp_readonly(ts) && !ts->mem_coherent) {
4490         if (!ts->mem_allocated) {
4491             temp_allocate_frame(s, ts);
4492         }
4493         switch (ts->val_type) {
4494         case TEMP_VAL_CONST:
4495             /* If we're going to free the temp immediately, then we won't
4496                require it later in a register, so attempt to store the
4497                constant to memory directly.  */
4498             if (free_or_dead
4499                 && tcg_out_sti(s, ts->type, ts->val,
4500                                ts->mem_base->reg, ts->mem_offset)) {
4501                 break;
4502             }
4503             temp_load(s, ts, tcg_target_available_regs[ts->type],
4504                       allocated_regs, preferred_regs);
4505             /* fallthrough */
4506 
4507         case TEMP_VAL_REG:
4508             tcg_out_st(s, ts->type, ts->reg,
4509                        ts->mem_base->reg, ts->mem_offset);
4510             break;
4511 
4512         case TEMP_VAL_MEM:
4513             break;
4514 
4515         case TEMP_VAL_DEAD:
4516         default:
4517             g_assert_not_reached();
4518         }
4519         ts->mem_coherent = 1;
4520     }
4521     if (free_or_dead) {
4522         temp_free_or_dead(s, ts, free_or_dead);
4523     }
4524 }
4525 
4526 /* free register 'reg' by spilling the corresponding temporary if necessary */
4527 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4528 {
4529     TCGTemp *ts = s->reg_to_temp[reg];
4530     if (ts != NULL) {
4531         temp_sync(s, ts, allocated_regs, 0, -1);
4532     }
4533 }
4534 
4535 /**
4536  * tcg_reg_alloc:
4537  * @required_regs: Set of registers in which we must allocate.
4538  * @allocated_regs: Set of registers which must be avoided.
4539  * @preferred_regs: Set of registers we should prefer.
4540  * @rev: True if we search the registers in "indirect" order.
4541  *
4542  * The allocated register must be in @required_regs & ~@allocated_regs,
4543  * but if we can put it in @preferred_regs we may save a move later.
4544  */
4545 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4546                             TCGRegSet allocated_regs,
4547                             TCGRegSet preferred_regs, bool rev)
4548 {
4549     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4550     TCGRegSet reg_ct[2];
4551     const int *order;
4552 
4553     reg_ct[1] = required_regs & ~allocated_regs;
4554     tcg_debug_assert(reg_ct[1] != 0);
4555     reg_ct[0] = reg_ct[1] & preferred_regs;
4556 
4557     /* Skip the preferred_regs option if it cannot be satisfied,
4558        or if the preference made no difference.  */
4559     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4560 
4561     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4562 
4563     /* Try free registers, preferences first.  */
4564     for (j = f; j < 2; j++) {
4565         TCGRegSet set = reg_ct[j];
4566 
4567         if (tcg_regset_single(set)) {
4568             /* One register in the set.  */
4569             TCGReg reg = tcg_regset_first(set);
4570             if (s->reg_to_temp[reg] == NULL) {
4571                 return reg;
4572             }
4573         } else {
4574             for (i = 0; i < n; i++) {
4575                 TCGReg reg = order[i];
4576                 if (s->reg_to_temp[reg] == NULL &&
4577                     tcg_regset_test_reg(set, reg)) {
4578                     return reg;
4579                 }
4580             }
4581         }
4582     }
4583 
4584     /* We must spill something.  */
4585     for (j = f; j < 2; j++) {
4586         TCGRegSet set = reg_ct[j];
4587 
4588         if (tcg_regset_single(set)) {
4589             /* One register in the set.  */
4590             TCGReg reg = tcg_regset_first(set);
4591             tcg_reg_free(s, reg, allocated_regs);
4592             return reg;
4593         } else {
4594             for (i = 0; i < n; i++) {
4595                 TCGReg reg = order[i];
4596                 if (tcg_regset_test_reg(set, reg)) {
4597                     tcg_reg_free(s, reg, allocated_regs);
4598                     return reg;
4599                 }
4600             }
4601         }
4602     }
4603 
4604     g_assert_not_reached();
4605 }
4606 
4607 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4608                                  TCGRegSet allocated_regs,
4609                                  TCGRegSet preferred_regs, bool rev)
4610 {
4611     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4612     TCGRegSet reg_ct[2];
4613     const int *order;
4614 
4615     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4616     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4617     tcg_debug_assert(reg_ct[1] != 0);
4618     reg_ct[0] = reg_ct[1] & preferred_regs;
4619 
4620     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4621 
4622     /*
4623      * Skip the preferred_regs option if it cannot be satisfied,
4624      * or if the preference made no difference.
4625      */
4626     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4627 
4628     /*
4629      * Minimize the number of flushes by looking for 2 free registers first,
4630      * then a single flush, then two flushes.
4631      */
4632     for (fmin = 2; fmin >= 0; fmin--) {
4633         for (j = k; j < 2; j++) {
4634             TCGRegSet set = reg_ct[j];
4635 
4636             for (i = 0; i < n; i++) {
4637                 TCGReg reg = order[i];
4638 
4639                 if (tcg_regset_test_reg(set, reg)) {
4640                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4641                     if (f >= fmin) {
4642                         tcg_reg_free(s, reg, allocated_regs);
4643                         tcg_reg_free(s, reg + 1, allocated_regs);
4644                         return reg;
4645                     }
4646                 }
4647             }
4648         }
4649     }
4650     g_assert_not_reached();
4651 }
4652 
4653 /* Make sure the temporary is in a register.  If needed, allocate the register
4654    from DESIRED while avoiding ALLOCATED.  */
4655 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4656                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4657 {
4658     TCGReg reg;
4659 
4660     switch (ts->val_type) {
4661     case TEMP_VAL_REG:
4662         return;
4663     case TEMP_VAL_CONST:
4664         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4665                             preferred_regs, ts->indirect_base);
4666         if (ts->type <= TCG_TYPE_I64) {
4667             tcg_out_movi(s, ts->type, reg, ts->val);
4668         } else {
4669             uint64_t val = ts->val;
4670             MemOp vece = MO_64;
4671 
4672             /*
4673              * Find the minimal vector element that matches the constant.
4674              * The targets will, in general, have to do this search anyway,
4675              * do this generically.
4676              */
4677             if (val == dup_const(MO_8, val)) {
4678                 vece = MO_8;
4679             } else if (val == dup_const(MO_16, val)) {
4680                 vece = MO_16;
4681             } else if (val == dup_const(MO_32, val)) {
4682                 vece = MO_32;
4683             }
4684 
4685             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4686         }
4687         ts->mem_coherent = 0;
4688         break;
4689     case TEMP_VAL_MEM:
4690         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4691                             preferred_regs, ts->indirect_base);
4692         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4693         ts->mem_coherent = 1;
4694         break;
4695     case TEMP_VAL_DEAD:
4696     default:
4697         g_assert_not_reached();
4698     }
4699     set_temp_val_reg(s, ts, reg);
4700 }
4701 
4702 /* Save a temporary to memory. 'allocated_regs' is used in case a
4703    temporary registers needs to be allocated to store a constant.  */
4704 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4705 {
4706     /* The liveness analysis already ensures that globals are back
4707        in memory. Keep an tcg_debug_assert for safety. */
4708     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4709 }
4710 
4711 /* save globals to their canonical location and assume they can be
4712    modified be the following code. 'allocated_regs' is used in case a
4713    temporary registers needs to be allocated to store a constant. */
4714 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4715 {
4716     int i, n;
4717 
4718     for (i = 0, n = s->nb_globals; i < n; i++) {
4719         temp_save(s, &s->temps[i], allocated_regs);
4720     }
4721 }
4722 
4723 /* sync globals to their canonical location and assume they can be
4724    read by the following code. 'allocated_regs' is used in case a
4725    temporary registers needs to be allocated to store a constant. */
4726 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4727 {
4728     int i, n;
4729 
4730     for (i = 0, n = s->nb_globals; i < n; i++) {
4731         TCGTemp *ts = &s->temps[i];
4732         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4733                          || ts->kind == TEMP_FIXED
4734                          || ts->mem_coherent);
4735     }
4736 }
4737 
4738 /* at the end of a basic block, we assume all temporaries are dead and
4739    all globals are stored at their canonical location. */
4740 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4741 {
4742     int i;
4743 
4744     for (i = s->nb_globals; i < s->nb_temps; i++) {
4745         TCGTemp *ts = &s->temps[i];
4746 
4747         switch (ts->kind) {
4748         case TEMP_TB:
4749             temp_save(s, ts, allocated_regs);
4750             break;
4751         case TEMP_EBB:
4752             /* The liveness analysis already ensures that temps are dead.
4753                Keep an tcg_debug_assert for safety. */
4754             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4755             break;
4756         case TEMP_CONST:
4757             /* Similarly, we should have freed any allocated register. */
4758             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4759             break;
4760         default:
4761             g_assert_not_reached();
4762         }
4763     }
4764 
4765     save_globals(s, allocated_regs);
4766 }
4767 
4768 /*
4769  * At a conditional branch, we assume all temporaries are dead unless
4770  * explicitly live-across-conditional-branch; all globals and local
4771  * temps are synced to their location.
4772  */
4773 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4774 {
4775     sync_globals(s, allocated_regs);
4776 
4777     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4778         TCGTemp *ts = &s->temps[i];
4779         /*
4780          * The liveness analysis already ensures that temps are dead.
4781          * Keep tcg_debug_asserts for safety.
4782          */
4783         switch (ts->kind) {
4784         case TEMP_TB:
4785             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4786             break;
4787         case TEMP_EBB:
4788         case TEMP_CONST:
4789             break;
4790         default:
4791             g_assert_not_reached();
4792         }
4793     }
4794 }
4795 
4796 /*
4797  * Specialized code generation for INDEX_op_mov_* with a constant.
4798  */
4799 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4800                                   tcg_target_ulong val, TCGLifeData arg_life,
4801                                   TCGRegSet preferred_regs)
4802 {
4803     /* ENV should not be modified.  */
4804     tcg_debug_assert(!temp_readonly(ots));
4805 
4806     /* The movi is not explicitly generated here.  */
4807     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4808     ots->val = val;
4809     ots->mem_coherent = 0;
4810     if (NEED_SYNC_ARG(0)) {
4811         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4812     } else if (IS_DEAD_ARG(0)) {
4813         temp_dead(s, ots);
4814     }
4815 }
4816 
4817 /*
4818  * Specialized code generation for INDEX_op_mov_*.
4819  */
4820 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4821 {
4822     const TCGLifeData arg_life = op->life;
4823     TCGRegSet allocated_regs, preferred_regs;
4824     TCGTemp *ts, *ots;
4825     TCGType otype, itype;
4826     TCGReg oreg, ireg;
4827 
4828     allocated_regs = s->reserved_regs;
4829     preferred_regs = output_pref(op, 0);
4830     ots = arg_temp(op->args[0]);
4831     ts = arg_temp(op->args[1]);
4832 
4833     /* ENV should not be modified.  */
4834     tcg_debug_assert(!temp_readonly(ots));
4835 
4836     /* Note that otype != itype for no-op truncation.  */
4837     otype = ots->type;
4838     itype = ts->type;
4839 
4840     if (ts->val_type == TEMP_VAL_CONST) {
4841         /* propagate constant or generate sti */
4842         tcg_target_ulong val = ts->val;
4843         if (IS_DEAD_ARG(1)) {
4844             temp_dead(s, ts);
4845         }
4846         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4847         return;
4848     }
4849 
4850     /* If the source value is in memory we're going to be forced
4851        to have it in a register in order to perform the copy.  Copy
4852        the SOURCE value into its own register first, that way we
4853        don't have to reload SOURCE the next time it is used. */
4854     if (ts->val_type == TEMP_VAL_MEM) {
4855         temp_load(s, ts, tcg_target_available_regs[itype],
4856                   allocated_regs, preferred_regs);
4857     }
4858     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4859     ireg = ts->reg;
4860 
4861     if (IS_DEAD_ARG(0)) {
4862         /* mov to a non-saved dead register makes no sense (even with
4863            liveness analysis disabled). */
4864         tcg_debug_assert(NEED_SYNC_ARG(0));
4865         if (!ots->mem_allocated) {
4866             temp_allocate_frame(s, ots);
4867         }
4868         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4869         if (IS_DEAD_ARG(1)) {
4870             temp_dead(s, ts);
4871         }
4872         temp_dead(s, ots);
4873         return;
4874     }
4875 
4876     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4877         /*
4878          * The mov can be suppressed.  Kill input first, so that it
4879          * is unlinked from reg_to_temp, then set the output to the
4880          * reg that we saved from the input.
4881          */
4882         temp_dead(s, ts);
4883         oreg = ireg;
4884     } else {
4885         if (ots->val_type == TEMP_VAL_REG) {
4886             oreg = ots->reg;
4887         } else {
4888             /* Make sure to not spill the input register during allocation. */
4889             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4890                                  allocated_regs | ((TCGRegSet)1 << ireg),
4891                                  preferred_regs, ots->indirect_base);
4892         }
4893         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4894             /*
4895              * Cross register class move not supported.
4896              * Store the source register into the destination slot
4897              * and leave the destination temp as TEMP_VAL_MEM.
4898              */
4899             assert(!temp_readonly(ots));
4900             if (!ts->mem_allocated) {
4901                 temp_allocate_frame(s, ots);
4902             }
4903             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4904             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4905             ots->mem_coherent = 1;
4906             return;
4907         }
4908     }
4909     set_temp_val_reg(s, ots, oreg);
4910     ots->mem_coherent = 0;
4911 
4912     if (NEED_SYNC_ARG(0)) {
4913         temp_sync(s, ots, allocated_regs, 0, 0);
4914     }
4915 }
4916 
4917 /*
4918  * Specialized code generation for INDEX_op_dup_vec.
4919  */
4920 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4921 {
4922     const TCGLifeData arg_life = op->life;
4923     TCGRegSet dup_out_regs, dup_in_regs;
4924     const TCGArgConstraint *dup_args_ct;
4925     TCGTemp *its, *ots;
4926     TCGType itype, vtype;
4927     unsigned vece;
4928     int lowpart_ofs;
4929     bool ok;
4930 
4931     ots = arg_temp(op->args[0]);
4932     its = arg_temp(op->args[1]);
4933 
4934     /* ENV should not be modified.  */
4935     tcg_debug_assert(!temp_readonly(ots));
4936 
4937     itype = its->type;
4938     vece = TCGOP_VECE(op);
4939     vtype = TCGOP_TYPE(op);
4940 
4941     if (its->val_type == TEMP_VAL_CONST) {
4942         /* Propagate constant via movi -> dupi.  */
4943         tcg_target_ulong val = its->val;
4944         if (IS_DEAD_ARG(1)) {
4945             temp_dead(s, its);
4946         }
4947         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4948         return;
4949     }
4950 
4951     dup_args_ct = opcode_args_ct(op);
4952     dup_out_regs = dup_args_ct[0].regs;
4953     dup_in_regs = dup_args_ct[1].regs;
4954 
4955     /* Allocate the output register now.  */
4956     if (ots->val_type != TEMP_VAL_REG) {
4957         TCGRegSet allocated_regs = s->reserved_regs;
4958         TCGReg oreg;
4959 
4960         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4961             /* Make sure to not spill the input register. */
4962             tcg_regset_set_reg(allocated_regs, its->reg);
4963         }
4964         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4965                              output_pref(op, 0), ots->indirect_base);
4966         set_temp_val_reg(s, ots, oreg);
4967     }
4968 
4969     switch (its->val_type) {
4970     case TEMP_VAL_REG:
4971         /*
4972          * The dup constriaints must be broad, covering all possible VECE.
4973          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4974          * to fail, indicating that extra moves are required for that case.
4975          */
4976         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4977             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4978                 goto done;
4979             }
4980             /* Try again from memory or a vector input register.  */
4981         }
4982         if (!its->mem_coherent) {
4983             /*
4984              * The input register is not synced, and so an extra store
4985              * would be required to use memory.  Attempt an integer-vector
4986              * register move first.  We do not have a TCGRegSet for this.
4987              */
4988             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4989                 break;
4990             }
4991             /* Sync the temp back to its slot and load from there.  */
4992             temp_sync(s, its, s->reserved_regs, 0, 0);
4993         }
4994         /* fall through */
4995 
4996     case TEMP_VAL_MEM:
4997         lowpart_ofs = 0;
4998         if (HOST_BIG_ENDIAN) {
4999             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5000         }
5001         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5002                              its->mem_offset + lowpart_ofs)) {
5003             goto done;
5004         }
5005         /* Load the input into the destination vector register. */
5006         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5007         break;
5008 
5009     default:
5010         g_assert_not_reached();
5011     }
5012 
5013     /* We now have a vector input register, so dup must succeed. */
5014     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5015     tcg_debug_assert(ok);
5016 
5017  done:
5018     ots->mem_coherent = 0;
5019     if (IS_DEAD_ARG(1)) {
5020         temp_dead(s, its);
5021     }
5022     if (NEED_SYNC_ARG(0)) {
5023         temp_sync(s, ots, s->reserved_regs, 0, 0);
5024     }
5025     if (IS_DEAD_ARG(0)) {
5026         temp_dead(s, ots);
5027     }
5028 }
5029 
5030 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5031 {
5032     const TCGLifeData arg_life = op->life;
5033     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5034     TCGRegSet i_allocated_regs;
5035     TCGRegSet o_allocated_regs;
5036     int i, k, nb_iargs, nb_oargs;
5037     TCGReg reg;
5038     TCGArg arg;
5039     const TCGArgConstraint *args_ct;
5040     const TCGArgConstraint *arg_ct;
5041     TCGTemp *ts;
5042     TCGArg new_args[TCG_MAX_OP_ARGS];
5043     int const_args[TCG_MAX_OP_ARGS];
5044     TCGCond op_cond;
5045 
5046     nb_oargs = def->nb_oargs;
5047     nb_iargs = def->nb_iargs;
5048 
5049     /* copy constants */
5050     memcpy(new_args + nb_oargs + nb_iargs,
5051            op->args + nb_oargs + nb_iargs,
5052            sizeof(TCGArg) * def->nb_cargs);
5053 
5054     i_allocated_regs = s->reserved_regs;
5055     o_allocated_regs = s->reserved_regs;
5056 
5057     switch (op->opc) {
5058     case INDEX_op_brcond_i32:
5059     case INDEX_op_brcond_i64:
5060         op_cond = op->args[2];
5061         break;
5062     case INDEX_op_setcond_i32:
5063     case INDEX_op_setcond_i64:
5064     case INDEX_op_negsetcond_i32:
5065     case INDEX_op_negsetcond_i64:
5066     case INDEX_op_cmp_vec:
5067         op_cond = op->args[3];
5068         break;
5069     case INDEX_op_brcond2_i32:
5070         op_cond = op->args[4];
5071         break;
5072     case INDEX_op_movcond_i32:
5073     case INDEX_op_movcond_i64:
5074     case INDEX_op_setcond2_i32:
5075     case INDEX_op_cmpsel_vec:
5076         op_cond = op->args[5];
5077         break;
5078     default:
5079         /* No condition within opcode. */
5080         op_cond = TCG_COND_ALWAYS;
5081         break;
5082     }
5083 
5084     args_ct = opcode_args_ct(op);
5085 
5086     /* satisfy input constraints */
5087     for (k = 0; k < nb_iargs; k++) {
5088         TCGRegSet i_preferred_regs, i_required_regs;
5089         bool allocate_new_reg, copyto_new_reg;
5090         TCGTemp *ts2;
5091         int i1, i2;
5092 
5093         i = args_ct[nb_oargs + k].sort_index;
5094         arg = op->args[i];
5095         arg_ct = &args_ct[i];
5096         ts = arg_temp(arg);
5097 
5098         if (ts->val_type == TEMP_VAL_CONST
5099             && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5100                                       op_cond, TCGOP_VECE(op))) {
5101             /* constant is OK for instruction */
5102             const_args[i] = 1;
5103             new_args[i] = ts->val;
5104             continue;
5105         }
5106 
5107         reg = ts->reg;
5108         i_preferred_regs = 0;
5109         i_required_regs = arg_ct->regs;
5110         allocate_new_reg = false;
5111         copyto_new_reg = false;
5112 
5113         switch (arg_ct->pair) {
5114         case 0: /* not paired */
5115             if (arg_ct->ialias) {
5116                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5117 
5118                 /*
5119                  * If the input is readonly, then it cannot also be an
5120                  * output and aliased to itself.  If the input is not
5121                  * dead after the instruction, we must allocate a new
5122                  * register and move it.
5123                  */
5124                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5125                     || args_ct[arg_ct->alias_index].newreg) {
5126                     allocate_new_reg = true;
5127                 } else if (ts->val_type == TEMP_VAL_REG) {
5128                     /*
5129                      * Check if the current register has already been
5130                      * allocated for another input.
5131                      */
5132                     allocate_new_reg =
5133                         tcg_regset_test_reg(i_allocated_regs, reg);
5134                 }
5135             }
5136             if (!allocate_new_reg) {
5137                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5138                           i_preferred_regs);
5139                 reg = ts->reg;
5140                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5141             }
5142             if (allocate_new_reg) {
5143                 /*
5144                  * Allocate a new register matching the constraint
5145                  * and move the temporary register into it.
5146                  */
5147                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5148                           i_allocated_regs, 0);
5149                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5150                                     i_preferred_regs, ts->indirect_base);
5151                 copyto_new_reg = true;
5152             }
5153             break;
5154 
5155         case 1:
5156             /* First of an input pair; if i1 == i2, the second is an output. */
5157             i1 = i;
5158             i2 = arg_ct->pair_index;
5159             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5160 
5161             /*
5162              * It is easier to default to allocating a new pair
5163              * and to identify a few cases where it's not required.
5164              */
5165             if (arg_ct->ialias) {
5166                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5167                 if (IS_DEAD_ARG(i1) &&
5168                     IS_DEAD_ARG(i2) &&
5169                     !temp_readonly(ts) &&
5170                     ts->val_type == TEMP_VAL_REG &&
5171                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5172                     tcg_regset_test_reg(i_required_regs, reg) &&
5173                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5174                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5175                     (ts2
5176                      ? ts2->val_type == TEMP_VAL_REG &&
5177                        ts2->reg == reg + 1 &&
5178                        !temp_readonly(ts2)
5179                      : s->reg_to_temp[reg + 1] == NULL)) {
5180                     break;
5181                 }
5182             } else {
5183                 /* Without aliasing, the pair must also be an input. */
5184                 tcg_debug_assert(ts2);
5185                 if (ts->val_type == TEMP_VAL_REG &&
5186                     ts2->val_type == TEMP_VAL_REG &&
5187                     ts2->reg == reg + 1 &&
5188                     tcg_regset_test_reg(i_required_regs, reg)) {
5189                     break;
5190                 }
5191             }
5192             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5193                                      0, ts->indirect_base);
5194             goto do_pair;
5195 
5196         case 2: /* pair second */
5197             reg = new_args[arg_ct->pair_index] + 1;
5198             goto do_pair;
5199 
5200         case 3: /* ialias with second output, no first input */
5201             tcg_debug_assert(arg_ct->ialias);
5202             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5203 
5204             if (IS_DEAD_ARG(i) &&
5205                 !temp_readonly(ts) &&
5206                 ts->val_type == TEMP_VAL_REG &&
5207                 reg > 0 &&
5208                 s->reg_to_temp[reg - 1] == NULL &&
5209                 tcg_regset_test_reg(i_required_regs, reg) &&
5210                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5211                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5212                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5213                 break;
5214             }
5215             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5216                                      i_allocated_regs, 0,
5217                                      ts->indirect_base);
5218             tcg_regset_set_reg(i_allocated_regs, reg);
5219             reg += 1;
5220             goto do_pair;
5221 
5222         do_pair:
5223             /*
5224              * If an aliased input is not dead after the instruction,
5225              * we must allocate a new register and move it.
5226              */
5227             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5228                 TCGRegSet t_allocated_regs = i_allocated_regs;
5229 
5230                 /*
5231                  * Because of the alias, and the continued life, make sure
5232                  * that the temp is somewhere *other* than the reg pair,
5233                  * and we get a copy in reg.
5234                  */
5235                 tcg_regset_set_reg(t_allocated_regs, reg);
5236                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5237                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5238                     /* If ts was already in reg, copy it somewhere else. */
5239                     TCGReg nr;
5240                     bool ok;
5241 
5242                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5243                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5244                                        t_allocated_regs, 0, ts->indirect_base);
5245                     ok = tcg_out_mov(s, ts->type, nr, reg);
5246                     tcg_debug_assert(ok);
5247 
5248                     set_temp_val_reg(s, ts, nr);
5249                 } else {
5250                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5251                               t_allocated_regs, 0);
5252                     copyto_new_reg = true;
5253                 }
5254             } else {
5255                 /* Preferably allocate to reg, otherwise copy. */
5256                 i_required_regs = (TCGRegSet)1 << reg;
5257                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5258                           i_preferred_regs);
5259                 copyto_new_reg = ts->reg != reg;
5260             }
5261             break;
5262 
5263         default:
5264             g_assert_not_reached();
5265         }
5266 
5267         if (copyto_new_reg) {
5268             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5269                 /*
5270                  * Cross register class move not supported.  Sync the
5271                  * temp back to its slot and load from there.
5272                  */
5273                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5274                 tcg_out_ld(s, ts->type, reg,
5275                            ts->mem_base->reg, ts->mem_offset);
5276             }
5277         }
5278         new_args[i] = reg;
5279         const_args[i] = 0;
5280         tcg_regset_set_reg(i_allocated_regs, reg);
5281     }
5282 
5283     /* mark dead temporaries and free the associated registers */
5284     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5285         if (IS_DEAD_ARG(i)) {
5286             temp_dead(s, arg_temp(op->args[i]));
5287         }
5288     }
5289 
5290     if (def->flags & TCG_OPF_COND_BRANCH) {
5291         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5292     } else if (def->flags & TCG_OPF_BB_END) {
5293         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5294     } else {
5295         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5296             /* XXX: permit generic clobber register list ? */
5297             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5298                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5299                     tcg_reg_free(s, i, i_allocated_regs);
5300                 }
5301             }
5302         }
5303         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5304             /* sync globals if the op has side effects and might trigger
5305                an exception. */
5306             sync_globals(s, i_allocated_regs);
5307         }
5308 
5309         /* satisfy the output constraints */
5310         for (k = 0; k < nb_oargs; k++) {
5311             i = args_ct[k].sort_index;
5312             arg = op->args[i];
5313             arg_ct = &args_ct[i];
5314             ts = arg_temp(arg);
5315 
5316             /* ENV should not be modified.  */
5317             tcg_debug_assert(!temp_readonly(ts));
5318 
5319             switch (arg_ct->pair) {
5320             case 0: /* not paired */
5321                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5322                     reg = new_args[arg_ct->alias_index];
5323                 } else if (arg_ct->newreg) {
5324                     reg = tcg_reg_alloc(s, arg_ct->regs,
5325                                         i_allocated_regs | o_allocated_regs,
5326                                         output_pref(op, k), ts->indirect_base);
5327                 } else {
5328                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5329                                         output_pref(op, k), ts->indirect_base);
5330                 }
5331                 break;
5332 
5333             case 1: /* first of pair */
5334                 if (arg_ct->oalias) {
5335                     reg = new_args[arg_ct->alias_index];
5336                 } else if (arg_ct->newreg) {
5337                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5338                                              i_allocated_regs | o_allocated_regs,
5339                                              output_pref(op, k),
5340                                              ts->indirect_base);
5341                 } else {
5342                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5343                                              output_pref(op, k),
5344                                              ts->indirect_base);
5345                 }
5346                 break;
5347 
5348             case 2: /* second of pair */
5349                 if (arg_ct->oalias) {
5350                     reg = new_args[arg_ct->alias_index];
5351                 } else {
5352                     reg = new_args[arg_ct->pair_index] + 1;
5353                 }
5354                 break;
5355 
5356             case 3: /* first of pair, aliasing with a second input */
5357                 tcg_debug_assert(!arg_ct->newreg);
5358                 reg = new_args[arg_ct->pair_index] - 1;
5359                 break;
5360 
5361             default:
5362                 g_assert_not_reached();
5363             }
5364             tcg_regset_set_reg(o_allocated_regs, reg);
5365             set_temp_val_reg(s, ts, reg);
5366             ts->mem_coherent = 0;
5367             new_args[i] = reg;
5368         }
5369     }
5370 
5371     /* emit instruction */
5372     switch (op->opc) {
5373     case INDEX_op_ext8s_i32:
5374         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5375         break;
5376     case INDEX_op_ext8s_i64:
5377         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5378         break;
5379     case INDEX_op_ext8u_i32:
5380     case INDEX_op_ext8u_i64:
5381         tcg_out_ext8u(s, new_args[0], new_args[1]);
5382         break;
5383     case INDEX_op_ext16s_i32:
5384         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5385         break;
5386     case INDEX_op_ext16s_i64:
5387         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5388         break;
5389     case INDEX_op_ext16u_i32:
5390     case INDEX_op_ext16u_i64:
5391         tcg_out_ext16u(s, new_args[0], new_args[1]);
5392         break;
5393     case INDEX_op_ext32s_i64:
5394         tcg_out_ext32s(s, new_args[0], new_args[1]);
5395         break;
5396     case INDEX_op_ext32u_i64:
5397         tcg_out_ext32u(s, new_args[0], new_args[1]);
5398         break;
5399     case INDEX_op_ext_i32_i64:
5400         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5401         break;
5402     case INDEX_op_extu_i32_i64:
5403         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5404         break;
5405     case INDEX_op_extrl_i64_i32:
5406         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5407         break;
5408     default:
5409         if (def->flags & TCG_OPF_VECTOR) {
5410             tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
5411                            TCGOP_VECE(op), new_args, const_args);
5412         } else {
5413             tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
5414         }
5415         break;
5416     }
5417 
5418     /* move the outputs in the correct register if needed */
5419     for(i = 0; i < nb_oargs; i++) {
5420         ts = arg_temp(op->args[i]);
5421 
5422         /* ENV should not be modified.  */
5423         tcg_debug_assert(!temp_readonly(ts));
5424 
5425         if (NEED_SYNC_ARG(i)) {
5426             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5427         } else if (IS_DEAD_ARG(i)) {
5428             temp_dead(s, ts);
5429         }
5430     }
5431 }
5432 
5433 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5434 {
5435     const TCGLifeData arg_life = op->life;
5436     TCGTemp *ots, *itsl, *itsh;
5437     TCGType vtype = TCGOP_TYPE(op);
5438 
5439     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5440     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5441     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5442 
5443     ots = arg_temp(op->args[0]);
5444     itsl = arg_temp(op->args[1]);
5445     itsh = arg_temp(op->args[2]);
5446 
5447     /* ENV should not be modified.  */
5448     tcg_debug_assert(!temp_readonly(ots));
5449 
5450     /* Allocate the output register now.  */
5451     if (ots->val_type != TEMP_VAL_REG) {
5452         TCGRegSet allocated_regs = s->reserved_regs;
5453         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5454         TCGReg oreg;
5455 
5456         /* Make sure to not spill the input registers. */
5457         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5458             tcg_regset_set_reg(allocated_regs, itsl->reg);
5459         }
5460         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5461             tcg_regset_set_reg(allocated_regs, itsh->reg);
5462         }
5463 
5464         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5465                              output_pref(op, 0), ots->indirect_base);
5466         set_temp_val_reg(s, ots, oreg);
5467     }
5468 
5469     /* Promote dup2 of immediates to dupi_vec. */
5470     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5471         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5472         MemOp vece = MO_64;
5473 
5474         if (val == dup_const(MO_8, val)) {
5475             vece = MO_8;
5476         } else if (val == dup_const(MO_16, val)) {
5477             vece = MO_16;
5478         } else if (val == dup_const(MO_32, val)) {
5479             vece = MO_32;
5480         }
5481 
5482         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5483         goto done;
5484     }
5485 
5486     /* If the two inputs form one 64-bit value, try dupm_vec. */
5487     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5488         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5489         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5490         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5491 
5492         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5493         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5494 
5495         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5496                              its->mem_base->reg, its->mem_offset)) {
5497             goto done;
5498         }
5499     }
5500 
5501     /* Fall back to generic expansion. */
5502     return false;
5503 
5504  done:
5505     ots->mem_coherent = 0;
5506     if (IS_DEAD_ARG(1)) {
5507         temp_dead(s, itsl);
5508     }
5509     if (IS_DEAD_ARG(2)) {
5510         temp_dead(s, itsh);
5511     }
5512     if (NEED_SYNC_ARG(0)) {
5513         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5514     } else if (IS_DEAD_ARG(0)) {
5515         temp_dead(s, ots);
5516     }
5517     return true;
5518 }
5519 
5520 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5521                          TCGRegSet allocated_regs)
5522 {
5523     if (ts->val_type == TEMP_VAL_REG) {
5524         if (ts->reg != reg) {
5525             tcg_reg_free(s, reg, allocated_regs);
5526             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5527                 /*
5528                  * Cross register class move not supported.  Sync the
5529                  * temp back to its slot and load from there.
5530                  */
5531                 temp_sync(s, ts, allocated_regs, 0, 0);
5532                 tcg_out_ld(s, ts->type, reg,
5533                            ts->mem_base->reg, ts->mem_offset);
5534             }
5535         }
5536     } else {
5537         TCGRegSet arg_set = 0;
5538 
5539         tcg_reg_free(s, reg, allocated_regs);
5540         tcg_regset_set_reg(arg_set, reg);
5541         temp_load(s, ts, arg_set, allocated_regs, 0);
5542     }
5543 }
5544 
5545 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5546                          TCGRegSet allocated_regs)
5547 {
5548     /*
5549      * When the destination is on the stack, load up the temp and store.
5550      * If there are many call-saved registers, the temp might live to
5551      * see another use; otherwise it'll be discarded.
5552      */
5553     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5554     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5555                arg_slot_stk_ofs(arg_slot));
5556 }
5557 
5558 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5559                             TCGTemp *ts, TCGRegSet *allocated_regs)
5560 {
5561     if (arg_slot_reg_p(l->arg_slot)) {
5562         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5563         load_arg_reg(s, reg, ts, *allocated_regs);
5564         tcg_regset_set_reg(*allocated_regs, reg);
5565     } else {
5566         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5567     }
5568 }
5569 
5570 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5571                          intptr_t ref_off, TCGRegSet *allocated_regs)
5572 {
5573     TCGReg reg;
5574 
5575     if (arg_slot_reg_p(arg_slot)) {
5576         reg = tcg_target_call_iarg_regs[arg_slot];
5577         tcg_reg_free(s, reg, *allocated_regs);
5578         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5579         tcg_regset_set_reg(*allocated_regs, reg);
5580     } else {
5581         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5582                             *allocated_regs, 0, false);
5583         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5584         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5585                    arg_slot_stk_ofs(arg_slot));
5586     }
5587 }
5588 
5589 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5590 {
5591     const int nb_oargs = TCGOP_CALLO(op);
5592     const int nb_iargs = TCGOP_CALLI(op);
5593     const TCGLifeData arg_life = op->life;
5594     const TCGHelperInfo *info = tcg_call_info(op);
5595     TCGRegSet allocated_regs = s->reserved_regs;
5596     int i;
5597 
5598     /*
5599      * Move inputs into place in reverse order,
5600      * so that we place stacked arguments first.
5601      */
5602     for (i = nb_iargs - 1; i >= 0; --i) {
5603         const TCGCallArgumentLoc *loc = &info->in[i];
5604         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5605 
5606         switch (loc->kind) {
5607         case TCG_CALL_ARG_NORMAL:
5608         case TCG_CALL_ARG_EXTEND_U:
5609         case TCG_CALL_ARG_EXTEND_S:
5610             load_arg_normal(s, loc, ts, &allocated_regs);
5611             break;
5612         case TCG_CALL_ARG_BY_REF:
5613             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5614             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5615                          arg_slot_stk_ofs(loc->ref_slot),
5616                          &allocated_regs);
5617             break;
5618         case TCG_CALL_ARG_BY_REF_N:
5619             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5620             break;
5621         default:
5622             g_assert_not_reached();
5623         }
5624     }
5625 
5626     /* Mark dead temporaries and free the associated registers.  */
5627     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5628         if (IS_DEAD_ARG(i)) {
5629             temp_dead(s, arg_temp(op->args[i]));
5630         }
5631     }
5632 
5633     /* Clobber call registers.  */
5634     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5635         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5636             tcg_reg_free(s, i, allocated_regs);
5637         }
5638     }
5639 
5640     /*
5641      * Save globals if they might be written by the helper,
5642      * sync them if they might be read.
5643      */
5644     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5645         /* Nothing to do */
5646     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5647         sync_globals(s, allocated_regs);
5648     } else {
5649         save_globals(s, allocated_regs);
5650     }
5651 
5652     /*
5653      * If the ABI passes a pointer to the returned struct as the first
5654      * argument, load that now.  Pass a pointer to the output home slot.
5655      */
5656     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5657         TCGTemp *ts = arg_temp(op->args[0]);
5658 
5659         if (!ts->mem_allocated) {
5660             temp_allocate_frame(s, ts);
5661         }
5662         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5663     }
5664 
5665     tcg_out_call(s, tcg_call_func(op), info);
5666 
5667     /* Assign output registers and emit moves if needed.  */
5668     switch (info->out_kind) {
5669     case TCG_CALL_RET_NORMAL:
5670         for (i = 0; i < nb_oargs; i++) {
5671             TCGTemp *ts = arg_temp(op->args[i]);
5672             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5673 
5674             /* ENV should not be modified.  */
5675             tcg_debug_assert(!temp_readonly(ts));
5676 
5677             set_temp_val_reg(s, ts, reg);
5678             ts->mem_coherent = 0;
5679         }
5680         break;
5681 
5682     case TCG_CALL_RET_BY_VEC:
5683         {
5684             TCGTemp *ts = arg_temp(op->args[0]);
5685 
5686             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5687             tcg_debug_assert(ts->temp_subindex == 0);
5688             if (!ts->mem_allocated) {
5689                 temp_allocate_frame(s, ts);
5690             }
5691             tcg_out_st(s, TCG_TYPE_V128,
5692                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5693                        ts->mem_base->reg, ts->mem_offset);
5694         }
5695         /* fall through to mark all parts in memory */
5696 
5697     case TCG_CALL_RET_BY_REF:
5698         /* The callee has performed a write through the reference. */
5699         for (i = 0; i < nb_oargs; i++) {
5700             TCGTemp *ts = arg_temp(op->args[i]);
5701             ts->val_type = TEMP_VAL_MEM;
5702         }
5703         break;
5704 
5705     default:
5706         g_assert_not_reached();
5707     }
5708 
5709     /* Flush or discard output registers as needed. */
5710     for (i = 0; i < nb_oargs; i++) {
5711         TCGTemp *ts = arg_temp(op->args[i]);
5712         if (NEED_SYNC_ARG(i)) {
5713             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5714         } else if (IS_DEAD_ARG(i)) {
5715             temp_dead(s, ts);
5716         }
5717     }
5718 }
5719 
5720 /**
5721  * atom_and_align_for_opc:
5722  * @s: tcg context
5723  * @opc: memory operation code
5724  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5725  * @allow_two_ops: true if we are prepared to issue two operations
5726  *
5727  * Return the alignment and atomicity to use for the inline fast path
5728  * for the given memory operation.  The alignment may be larger than
5729  * that specified in @opc, and the correct alignment will be diagnosed
5730  * by the slow path helper.
5731  *
5732  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5733  * and issue two loads or stores for subalignment.
5734  */
5735 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5736                                            MemOp host_atom, bool allow_two_ops)
5737 {
5738     MemOp align = memop_alignment_bits(opc);
5739     MemOp size = opc & MO_SIZE;
5740     MemOp half = size ? size - 1 : 0;
5741     MemOp atom = opc & MO_ATOM_MASK;
5742     MemOp atmax;
5743 
5744     switch (atom) {
5745     case MO_ATOM_NONE:
5746         /* The operation requires no specific atomicity. */
5747         atmax = MO_8;
5748         break;
5749 
5750     case MO_ATOM_IFALIGN:
5751         atmax = size;
5752         break;
5753 
5754     case MO_ATOM_IFALIGN_PAIR:
5755         atmax = half;
5756         break;
5757 
5758     case MO_ATOM_WITHIN16:
5759         atmax = size;
5760         if (size == MO_128) {
5761             /* Misalignment implies !within16, and therefore no atomicity. */
5762         } else if (host_atom != MO_ATOM_WITHIN16) {
5763             /* The host does not implement within16, so require alignment. */
5764             align = MAX(align, size);
5765         }
5766         break;
5767 
5768     case MO_ATOM_WITHIN16_PAIR:
5769         atmax = size;
5770         /*
5771          * Misalignment implies !within16, and therefore half atomicity.
5772          * Any host prepared for two operations can implement this with
5773          * half alignment.
5774          */
5775         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5776             align = MAX(align, half);
5777         }
5778         break;
5779 
5780     case MO_ATOM_SUBALIGN:
5781         atmax = size;
5782         if (host_atom != MO_ATOM_SUBALIGN) {
5783             /* If unaligned but not odd, there are subobjects up to half. */
5784             if (allow_two_ops) {
5785                 align = MAX(align, half);
5786             } else {
5787                 align = MAX(align, size);
5788             }
5789         }
5790         break;
5791 
5792     default:
5793         g_assert_not_reached();
5794     }
5795 
5796     return (TCGAtomAlign){ .atom = atmax, .align = align };
5797 }
5798 
5799 /*
5800  * Similarly for qemu_ld/st slow path helpers.
5801  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5802  * using only the provided backend tcg_out_* functions.
5803  */
5804 
5805 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5806 {
5807     int ofs = arg_slot_stk_ofs(slot);
5808 
5809     /*
5810      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5811      * require extension to uint64_t, adjust the address for uint32_t.
5812      */
5813     if (HOST_BIG_ENDIAN &&
5814         TCG_TARGET_REG_BITS == 64 &&
5815         type == TCG_TYPE_I32) {
5816         ofs += 4;
5817     }
5818     return ofs;
5819 }
5820 
5821 static void tcg_out_helper_load_slots(TCGContext *s,
5822                                       unsigned nmov, TCGMovExtend *mov,
5823                                       const TCGLdstHelperParam *parm)
5824 {
5825     unsigned i;
5826     TCGReg dst3;
5827 
5828     /*
5829      * Start from the end, storing to the stack first.
5830      * This frees those registers, so we need not consider overlap.
5831      */
5832     for (i = nmov; i-- > 0; ) {
5833         unsigned slot = mov[i].dst;
5834 
5835         if (arg_slot_reg_p(slot)) {
5836             goto found_reg;
5837         }
5838 
5839         TCGReg src = mov[i].src;
5840         TCGType dst_type = mov[i].dst_type;
5841         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5842 
5843         /* The argument is going onto the stack; extend into scratch. */
5844         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5845             tcg_debug_assert(parm->ntmp != 0);
5846             mov[i].dst = src = parm->tmp[0];
5847             tcg_out_movext1(s, &mov[i]);
5848         }
5849 
5850         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5851                    tcg_out_helper_stk_ofs(dst_type, slot));
5852     }
5853     return;
5854 
5855  found_reg:
5856     /*
5857      * The remaining arguments are in registers.
5858      * Convert slot numbers to argument registers.
5859      */
5860     nmov = i + 1;
5861     for (i = 0; i < nmov; ++i) {
5862         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5863     }
5864 
5865     switch (nmov) {
5866     case 4:
5867         /* The backend must have provided enough temps for the worst case. */
5868         tcg_debug_assert(parm->ntmp >= 2);
5869 
5870         dst3 = mov[3].dst;
5871         for (unsigned j = 0; j < 3; ++j) {
5872             if (dst3 == mov[j].src) {
5873                 /*
5874                  * Conflict. Copy the source to a temporary, perform the
5875                  * remaining moves, then the extension from our scratch
5876                  * on the way out.
5877                  */
5878                 TCGReg scratch = parm->tmp[1];
5879 
5880                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5881                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5882                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5883                 break;
5884             }
5885         }
5886 
5887         /* No conflicts: perform this move and continue. */
5888         tcg_out_movext1(s, &mov[3]);
5889         /* fall through */
5890 
5891     case 3:
5892         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5893                         parm->ntmp ? parm->tmp[0] : -1);
5894         break;
5895     case 2:
5896         tcg_out_movext2(s, mov, mov + 1,
5897                         parm->ntmp ? parm->tmp[0] : -1);
5898         break;
5899     case 1:
5900         tcg_out_movext1(s, mov);
5901         break;
5902     default:
5903         g_assert_not_reached();
5904     }
5905 }
5906 
5907 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5908                                     TCGType type, tcg_target_long imm,
5909                                     const TCGLdstHelperParam *parm)
5910 {
5911     if (arg_slot_reg_p(slot)) {
5912         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5913     } else {
5914         int ofs = tcg_out_helper_stk_ofs(type, slot);
5915         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5916             tcg_debug_assert(parm->ntmp != 0);
5917             tcg_out_movi(s, type, parm->tmp[0], imm);
5918             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5919         }
5920     }
5921 }
5922 
5923 static void tcg_out_helper_load_common_args(TCGContext *s,
5924                                             const TCGLabelQemuLdst *ldst,
5925                                             const TCGLdstHelperParam *parm,
5926                                             const TCGHelperInfo *info,
5927                                             unsigned next_arg)
5928 {
5929     TCGMovExtend ptr_mov = {
5930         .dst_type = TCG_TYPE_PTR,
5931         .src_type = TCG_TYPE_PTR,
5932         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5933     };
5934     const TCGCallArgumentLoc *loc = &info->in[0];
5935     TCGType type;
5936     unsigned slot;
5937     tcg_target_ulong imm;
5938 
5939     /*
5940      * Handle env, which is always first.
5941      */
5942     ptr_mov.dst = loc->arg_slot;
5943     ptr_mov.src = TCG_AREG0;
5944     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5945 
5946     /*
5947      * Handle oi.
5948      */
5949     imm = ldst->oi;
5950     loc = &info->in[next_arg];
5951     type = TCG_TYPE_I32;
5952     switch (loc->kind) {
5953     case TCG_CALL_ARG_NORMAL:
5954         break;
5955     case TCG_CALL_ARG_EXTEND_U:
5956     case TCG_CALL_ARG_EXTEND_S:
5957         /* No extension required for MemOpIdx. */
5958         tcg_debug_assert(imm <= INT32_MAX);
5959         type = TCG_TYPE_REG;
5960         break;
5961     default:
5962         g_assert_not_reached();
5963     }
5964     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5965     next_arg++;
5966 
5967     /*
5968      * Handle ra.
5969      */
5970     loc = &info->in[next_arg];
5971     slot = loc->arg_slot;
5972     if (parm->ra_gen) {
5973         int arg_reg = -1;
5974         TCGReg ra_reg;
5975 
5976         if (arg_slot_reg_p(slot)) {
5977             arg_reg = tcg_target_call_iarg_regs[slot];
5978         }
5979         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5980 
5981         ptr_mov.dst = slot;
5982         ptr_mov.src = ra_reg;
5983         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5984     } else {
5985         imm = (uintptr_t)ldst->raddr;
5986         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5987     }
5988 }
5989 
5990 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5991                                        const TCGCallArgumentLoc *loc,
5992                                        TCGType dst_type, TCGType src_type,
5993                                        TCGReg lo, TCGReg hi)
5994 {
5995     MemOp reg_mo;
5996 
5997     if (dst_type <= TCG_TYPE_REG) {
5998         MemOp src_ext;
5999 
6000         switch (loc->kind) {
6001         case TCG_CALL_ARG_NORMAL:
6002             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6003             break;
6004         case TCG_CALL_ARG_EXTEND_U:
6005             dst_type = TCG_TYPE_REG;
6006             src_ext = MO_UL;
6007             break;
6008         case TCG_CALL_ARG_EXTEND_S:
6009             dst_type = TCG_TYPE_REG;
6010             src_ext = MO_SL;
6011             break;
6012         default:
6013             g_assert_not_reached();
6014         }
6015 
6016         mov[0].dst = loc->arg_slot;
6017         mov[0].dst_type = dst_type;
6018         mov[0].src = lo;
6019         mov[0].src_type = src_type;
6020         mov[0].src_ext = src_ext;
6021         return 1;
6022     }
6023 
6024     if (TCG_TARGET_REG_BITS == 32) {
6025         assert(dst_type == TCG_TYPE_I64);
6026         reg_mo = MO_32;
6027     } else {
6028         assert(dst_type == TCG_TYPE_I128);
6029         reg_mo = MO_64;
6030     }
6031 
6032     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6033     mov[0].src = lo;
6034     mov[0].dst_type = TCG_TYPE_REG;
6035     mov[0].src_type = TCG_TYPE_REG;
6036     mov[0].src_ext = reg_mo;
6037 
6038     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6039     mov[1].src = hi;
6040     mov[1].dst_type = TCG_TYPE_REG;
6041     mov[1].src_type = TCG_TYPE_REG;
6042     mov[1].src_ext = reg_mo;
6043 
6044     return 2;
6045 }
6046 
6047 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6048                                    const TCGLdstHelperParam *parm)
6049 {
6050     const TCGHelperInfo *info;
6051     const TCGCallArgumentLoc *loc;
6052     TCGMovExtend mov[2];
6053     unsigned next_arg, nmov;
6054     MemOp mop = get_memop(ldst->oi);
6055 
6056     switch (mop & MO_SIZE) {
6057     case MO_8:
6058     case MO_16:
6059     case MO_32:
6060         info = &info_helper_ld32_mmu;
6061         break;
6062     case MO_64:
6063         info = &info_helper_ld64_mmu;
6064         break;
6065     case MO_128:
6066         info = &info_helper_ld128_mmu;
6067         break;
6068     default:
6069         g_assert_not_reached();
6070     }
6071 
6072     /* Defer env argument. */
6073     next_arg = 1;
6074 
6075     loc = &info->in[next_arg];
6076     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6077         /*
6078          * 32-bit host with 32-bit guest: zero-extend the guest address
6079          * to 64-bits for the helper by storing the low part, then
6080          * load a zero for the high part.
6081          */
6082         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6083                                TCG_TYPE_I32, TCG_TYPE_I32,
6084                                ldst->addrlo_reg, -1);
6085         tcg_out_helper_load_slots(s, 1, mov, parm);
6086 
6087         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6088                                 TCG_TYPE_I32, 0, parm);
6089         next_arg += 2;
6090     } else {
6091         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6092                                       ldst->addrlo_reg, ldst->addrhi_reg);
6093         tcg_out_helper_load_slots(s, nmov, mov, parm);
6094         next_arg += nmov;
6095     }
6096 
6097     switch (info->out_kind) {
6098     case TCG_CALL_RET_NORMAL:
6099     case TCG_CALL_RET_BY_VEC:
6100         break;
6101     case TCG_CALL_RET_BY_REF:
6102         /*
6103          * The return reference is in the first argument slot.
6104          * We need memory in which to return: re-use the top of stack.
6105          */
6106         {
6107             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6108 
6109             if (arg_slot_reg_p(0)) {
6110                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6111                                  TCG_REG_CALL_STACK, ofs_slot0);
6112             } else {
6113                 tcg_debug_assert(parm->ntmp != 0);
6114                 tcg_out_addi_ptr(s, parm->tmp[0],
6115                                  TCG_REG_CALL_STACK, ofs_slot0);
6116                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6117                            TCG_REG_CALL_STACK, ofs_slot0);
6118             }
6119         }
6120         break;
6121     default:
6122         g_assert_not_reached();
6123     }
6124 
6125     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6126 }
6127 
6128 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6129                                   bool load_sign,
6130                                   const TCGLdstHelperParam *parm)
6131 {
6132     MemOp mop = get_memop(ldst->oi);
6133     TCGMovExtend mov[2];
6134     int ofs_slot0;
6135 
6136     switch (ldst->type) {
6137     case TCG_TYPE_I64:
6138         if (TCG_TARGET_REG_BITS == 32) {
6139             break;
6140         }
6141         /* fall through */
6142 
6143     case TCG_TYPE_I32:
6144         mov[0].dst = ldst->datalo_reg;
6145         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6146         mov[0].dst_type = ldst->type;
6147         mov[0].src_type = TCG_TYPE_REG;
6148 
6149         /*
6150          * If load_sign, then we allowed the helper to perform the
6151          * appropriate sign extension to tcg_target_ulong, and all
6152          * we need now is a plain move.
6153          *
6154          * If they do not, then we expect the relevant extension
6155          * instruction to be no more expensive than a move, and
6156          * we thus save the icache etc by only using one of two
6157          * helper functions.
6158          */
6159         if (load_sign || !(mop & MO_SIGN)) {
6160             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6161                 mov[0].src_ext = MO_32;
6162             } else {
6163                 mov[0].src_ext = MO_64;
6164             }
6165         } else {
6166             mov[0].src_ext = mop & MO_SSIZE;
6167         }
6168         tcg_out_movext1(s, mov);
6169         return;
6170 
6171     case TCG_TYPE_I128:
6172         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6173         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6174         switch (TCG_TARGET_CALL_RET_I128) {
6175         case TCG_CALL_RET_NORMAL:
6176             break;
6177         case TCG_CALL_RET_BY_VEC:
6178             tcg_out_st(s, TCG_TYPE_V128,
6179                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6180                        TCG_REG_CALL_STACK, ofs_slot0);
6181             /* fall through */
6182         case TCG_CALL_RET_BY_REF:
6183             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6184                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6185             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6186                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6187             return;
6188         default:
6189             g_assert_not_reached();
6190         }
6191         break;
6192 
6193     default:
6194         g_assert_not_reached();
6195     }
6196 
6197     mov[0].dst = ldst->datalo_reg;
6198     mov[0].src =
6199         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6200     mov[0].dst_type = TCG_TYPE_REG;
6201     mov[0].src_type = TCG_TYPE_REG;
6202     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6203 
6204     mov[1].dst = ldst->datahi_reg;
6205     mov[1].src =
6206         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6207     mov[1].dst_type = TCG_TYPE_REG;
6208     mov[1].src_type = TCG_TYPE_REG;
6209     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6210 
6211     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6212 }
6213 
6214 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6215                                    const TCGLdstHelperParam *parm)
6216 {
6217     const TCGHelperInfo *info;
6218     const TCGCallArgumentLoc *loc;
6219     TCGMovExtend mov[4];
6220     TCGType data_type;
6221     unsigned next_arg, nmov, n;
6222     MemOp mop = get_memop(ldst->oi);
6223 
6224     switch (mop & MO_SIZE) {
6225     case MO_8:
6226     case MO_16:
6227     case MO_32:
6228         info = &info_helper_st32_mmu;
6229         data_type = TCG_TYPE_I32;
6230         break;
6231     case MO_64:
6232         info = &info_helper_st64_mmu;
6233         data_type = TCG_TYPE_I64;
6234         break;
6235     case MO_128:
6236         info = &info_helper_st128_mmu;
6237         data_type = TCG_TYPE_I128;
6238         break;
6239     default:
6240         g_assert_not_reached();
6241     }
6242 
6243     /* Defer env argument. */
6244     next_arg = 1;
6245     nmov = 0;
6246 
6247     /* Handle addr argument. */
6248     loc = &info->in[next_arg];
6249     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6250         /*
6251          * 32-bit host with 32-bit guest: zero-extend the guest address
6252          * to 64-bits for the helper by storing the low part.  Later,
6253          * after we have processed the register inputs, we will load a
6254          * zero for the high part.
6255          */
6256         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6257                                TCG_TYPE_I32, TCG_TYPE_I32,
6258                                ldst->addrlo_reg, -1);
6259         next_arg += 2;
6260         nmov += 1;
6261     } else {
6262         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6263                                    ldst->addrlo_reg, ldst->addrhi_reg);
6264         next_arg += n;
6265         nmov += n;
6266     }
6267 
6268     /* Handle data argument. */
6269     loc = &info->in[next_arg];
6270     switch (loc->kind) {
6271     case TCG_CALL_ARG_NORMAL:
6272     case TCG_CALL_ARG_EXTEND_U:
6273     case TCG_CALL_ARG_EXTEND_S:
6274         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6275                                    ldst->datalo_reg, ldst->datahi_reg);
6276         next_arg += n;
6277         nmov += n;
6278         tcg_out_helper_load_slots(s, nmov, mov, parm);
6279         break;
6280 
6281     case TCG_CALL_ARG_BY_REF:
6282         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6283         tcg_debug_assert(data_type == TCG_TYPE_I128);
6284         tcg_out_st(s, TCG_TYPE_I64,
6285                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6286                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6287         tcg_out_st(s, TCG_TYPE_I64,
6288                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6289                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6290 
6291         tcg_out_helper_load_slots(s, nmov, mov, parm);
6292 
6293         if (arg_slot_reg_p(loc->arg_slot)) {
6294             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6295                              TCG_REG_CALL_STACK,
6296                              arg_slot_stk_ofs(loc->ref_slot));
6297         } else {
6298             tcg_debug_assert(parm->ntmp != 0);
6299             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6300                              arg_slot_stk_ofs(loc->ref_slot));
6301             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6302                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6303         }
6304         next_arg += 2;
6305         break;
6306 
6307     default:
6308         g_assert_not_reached();
6309     }
6310 
6311     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6312         /* Zero extend the address by loading a zero for the high part. */
6313         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6314         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6315     }
6316 
6317     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6318 }
6319 
6320 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6321 {
6322     int i, start_words, num_insns;
6323     TCGOp *op;
6324 
6325     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6326                  && qemu_log_in_addr_range(pc_start))) {
6327         FILE *logfile = qemu_log_trylock();
6328         if (logfile) {
6329             fprintf(logfile, "OP:\n");
6330             tcg_dump_ops(s, logfile, false);
6331             fprintf(logfile, "\n");
6332             qemu_log_unlock(logfile);
6333         }
6334     }
6335 
6336 #ifdef CONFIG_DEBUG_TCG
6337     /* Ensure all labels referenced have been emitted.  */
6338     {
6339         TCGLabel *l;
6340         bool error = false;
6341 
6342         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6343             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6344                 qemu_log_mask(CPU_LOG_TB_OP,
6345                               "$L%d referenced but not present.\n", l->id);
6346                 error = true;
6347             }
6348         }
6349         assert(!error);
6350     }
6351 #endif
6352 
6353     /* Do not reuse any EBB that may be allocated within the TB. */
6354     tcg_temp_ebb_reset_freed(s);
6355 
6356     tcg_optimize(s);
6357 
6358     reachable_code_pass(s);
6359     liveness_pass_0(s);
6360     liveness_pass_1(s);
6361 
6362     if (s->nb_indirects > 0) {
6363         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6364                      && qemu_log_in_addr_range(pc_start))) {
6365             FILE *logfile = qemu_log_trylock();
6366             if (logfile) {
6367                 fprintf(logfile, "OP before indirect lowering:\n");
6368                 tcg_dump_ops(s, logfile, false);
6369                 fprintf(logfile, "\n");
6370                 qemu_log_unlock(logfile);
6371             }
6372         }
6373 
6374         /* Replace indirect temps with direct temps.  */
6375         if (liveness_pass_2(s)) {
6376             /* If changes were made, re-run liveness.  */
6377             liveness_pass_1(s);
6378         }
6379     }
6380 
6381     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6382                  && qemu_log_in_addr_range(pc_start))) {
6383         FILE *logfile = qemu_log_trylock();
6384         if (logfile) {
6385             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6386             tcg_dump_ops(s, logfile, true);
6387             fprintf(logfile, "\n");
6388             qemu_log_unlock(logfile);
6389         }
6390     }
6391 
6392     /* Initialize goto_tb jump offsets. */
6393     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6394     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6395     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6396     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6397 
6398     tcg_reg_alloc_start(s);
6399 
6400     /*
6401      * Reset the buffer pointers when restarting after overflow.
6402      * TODO: Move this into translate-all.c with the rest of the
6403      * buffer management.  Having only this done here is confusing.
6404      */
6405     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6406     s->code_ptr = s->code_buf;
6407     s->data_gen_ptr = NULL;
6408 
6409     QSIMPLEQ_INIT(&s->ldst_labels);
6410     s->pool_labels = NULL;
6411 
6412     start_words = s->insn_start_words;
6413     s->gen_insn_data =
6414         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6415 
6416     tcg_out_tb_start(s);
6417 
6418     num_insns = -1;
6419     QTAILQ_FOREACH(op, &s->ops, link) {
6420         TCGOpcode opc = op->opc;
6421 
6422         switch (opc) {
6423         case INDEX_op_mov_i32:
6424         case INDEX_op_mov_i64:
6425         case INDEX_op_mov_vec:
6426             tcg_reg_alloc_mov(s, op);
6427             break;
6428         case INDEX_op_dup_vec:
6429             tcg_reg_alloc_dup(s, op);
6430             break;
6431         case INDEX_op_insn_start:
6432             if (num_insns >= 0) {
6433                 size_t off = tcg_current_code_size(s);
6434                 s->gen_insn_end_off[num_insns] = off;
6435                 /* Assert that we do not overflow our stored offset.  */
6436                 assert(s->gen_insn_end_off[num_insns] == off);
6437             }
6438             num_insns++;
6439             for (i = 0; i < start_words; ++i) {
6440                 s->gen_insn_data[num_insns * start_words + i] =
6441                     tcg_get_insn_start_param(op, i);
6442             }
6443             break;
6444         case INDEX_op_discard:
6445             temp_dead(s, arg_temp(op->args[0]));
6446             break;
6447         case INDEX_op_set_label:
6448             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6449             tcg_out_label(s, arg_label(op->args[0]));
6450             break;
6451         case INDEX_op_call:
6452             tcg_reg_alloc_call(s, op);
6453             break;
6454         case INDEX_op_exit_tb:
6455             tcg_out_exit_tb(s, op->args[0]);
6456             break;
6457         case INDEX_op_goto_tb:
6458             tcg_out_goto_tb(s, op->args[0]);
6459             break;
6460         case INDEX_op_dup2_vec:
6461             if (tcg_reg_alloc_dup2(s, op)) {
6462                 break;
6463             }
6464             /* fall through */
6465         default:
6466             /* Sanity check that we've not introduced any unhandled opcodes. */
6467             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6468                                               TCGOP_FLAGS(op)));
6469             /* Note: in order to speed up the code, it would be much
6470                faster to have specialized register allocator functions for
6471                some common argument patterns */
6472             tcg_reg_alloc_op(s, op);
6473             break;
6474         }
6475         /* Test for (pending) buffer overflow.  The assumption is that any
6476            one operation beginning below the high water mark cannot overrun
6477            the buffer completely.  Thus we can test for overflow after
6478            generating code without having to check during generation.  */
6479         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6480             return -1;
6481         }
6482         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6483         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6484             return -2;
6485         }
6486     }
6487     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6488     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6489 
6490     /* Generate TB finalization at the end of block */
6491     i = tcg_out_ldst_finalize(s);
6492     if (i < 0) {
6493         return i;
6494     }
6495     i = tcg_out_pool_finalize(s);
6496     if (i < 0) {
6497         return i;
6498     }
6499     if (!tcg_resolve_relocs(s)) {
6500         return -2;
6501     }
6502 
6503 #ifndef CONFIG_TCG_INTERPRETER
6504     /* flush instruction cache */
6505     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6506                         (uintptr_t)s->code_buf,
6507                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6508 #endif
6509 
6510     return tcg_current_code_size(s);
6511 }
6512 
6513 #ifdef ELF_HOST_MACHINE
6514 /* In order to use this feature, the backend needs to do three things:
6515 
6516    (1) Define ELF_HOST_MACHINE to indicate both what value to
6517        put into the ELF image and to indicate support for the feature.
6518 
6519    (2) Define tcg_register_jit.  This should create a buffer containing
6520        the contents of a .debug_frame section that describes the post-
6521        prologue unwind info for the tcg machine.
6522 
6523    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6524 */
6525 
6526 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6527 typedef enum {
6528     JIT_NOACTION = 0,
6529     JIT_REGISTER_FN,
6530     JIT_UNREGISTER_FN
6531 } jit_actions_t;
6532 
6533 struct jit_code_entry {
6534     struct jit_code_entry *next_entry;
6535     struct jit_code_entry *prev_entry;
6536     const void *symfile_addr;
6537     uint64_t symfile_size;
6538 };
6539 
6540 struct jit_descriptor {
6541     uint32_t version;
6542     uint32_t action_flag;
6543     struct jit_code_entry *relevant_entry;
6544     struct jit_code_entry *first_entry;
6545 };
6546 
6547 void __jit_debug_register_code(void) __attribute__((noinline));
6548 void __jit_debug_register_code(void)
6549 {
6550     asm("");
6551 }
6552 
6553 /* Must statically initialize the version, because GDB may check
6554    the version before we can set it.  */
6555 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6556 
6557 /* End GDB interface.  */
6558 
6559 static int find_string(const char *strtab, const char *str)
6560 {
6561     const char *p = strtab + 1;
6562 
6563     while (1) {
6564         if (strcmp(p, str) == 0) {
6565             return p - strtab;
6566         }
6567         p += strlen(p) + 1;
6568     }
6569 }
6570 
6571 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6572                                  const void *debug_frame,
6573                                  size_t debug_frame_size)
6574 {
6575     struct __attribute__((packed)) DebugInfo {
6576         uint32_t  len;
6577         uint16_t  version;
6578         uint32_t  abbrev;
6579         uint8_t   ptr_size;
6580         uint8_t   cu_die;
6581         uint16_t  cu_lang;
6582         uintptr_t cu_low_pc;
6583         uintptr_t cu_high_pc;
6584         uint8_t   fn_die;
6585         char      fn_name[16];
6586         uintptr_t fn_low_pc;
6587         uintptr_t fn_high_pc;
6588         uint8_t   cu_eoc;
6589     };
6590 
6591     struct ElfImage {
6592         ElfW(Ehdr) ehdr;
6593         ElfW(Phdr) phdr;
6594         ElfW(Shdr) shdr[7];
6595         ElfW(Sym)  sym[2];
6596         struct DebugInfo di;
6597         uint8_t    da[24];
6598         char       str[80];
6599     };
6600 
6601     struct ElfImage *img;
6602 
6603     static const struct ElfImage img_template = {
6604         .ehdr = {
6605             .e_ident[EI_MAG0] = ELFMAG0,
6606             .e_ident[EI_MAG1] = ELFMAG1,
6607             .e_ident[EI_MAG2] = ELFMAG2,
6608             .e_ident[EI_MAG3] = ELFMAG3,
6609             .e_ident[EI_CLASS] = ELF_CLASS,
6610             .e_ident[EI_DATA] = ELF_DATA,
6611             .e_ident[EI_VERSION] = EV_CURRENT,
6612             .e_type = ET_EXEC,
6613             .e_machine = ELF_HOST_MACHINE,
6614             .e_version = EV_CURRENT,
6615             .e_phoff = offsetof(struct ElfImage, phdr),
6616             .e_shoff = offsetof(struct ElfImage, shdr),
6617             .e_ehsize = sizeof(ElfW(Shdr)),
6618             .e_phentsize = sizeof(ElfW(Phdr)),
6619             .e_phnum = 1,
6620             .e_shentsize = sizeof(ElfW(Shdr)),
6621             .e_shnum = ARRAY_SIZE(img->shdr),
6622             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6623 #ifdef ELF_HOST_FLAGS
6624             .e_flags = ELF_HOST_FLAGS,
6625 #endif
6626 #ifdef ELF_OSABI
6627             .e_ident[EI_OSABI] = ELF_OSABI,
6628 #endif
6629         },
6630         .phdr = {
6631             .p_type = PT_LOAD,
6632             .p_flags = PF_X,
6633         },
6634         .shdr = {
6635             [0] = { .sh_type = SHT_NULL },
6636             /* Trick: The contents of code_gen_buffer are not present in
6637                this fake ELF file; that got allocated elsewhere.  Therefore
6638                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6639                will not look for contents.  We can record any address.  */
6640             [1] = { /* .text */
6641                 .sh_type = SHT_NOBITS,
6642                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6643             },
6644             [2] = { /* .debug_info */
6645                 .sh_type = SHT_PROGBITS,
6646                 .sh_offset = offsetof(struct ElfImage, di),
6647                 .sh_size = sizeof(struct DebugInfo),
6648             },
6649             [3] = { /* .debug_abbrev */
6650                 .sh_type = SHT_PROGBITS,
6651                 .sh_offset = offsetof(struct ElfImage, da),
6652                 .sh_size = sizeof(img->da),
6653             },
6654             [4] = { /* .debug_frame */
6655                 .sh_type = SHT_PROGBITS,
6656                 .sh_offset = sizeof(struct ElfImage),
6657             },
6658             [5] = { /* .symtab */
6659                 .sh_type = SHT_SYMTAB,
6660                 .sh_offset = offsetof(struct ElfImage, sym),
6661                 .sh_size = sizeof(img->sym),
6662                 .sh_info = 1,
6663                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6664                 .sh_entsize = sizeof(ElfW(Sym)),
6665             },
6666             [6] = { /* .strtab */
6667                 .sh_type = SHT_STRTAB,
6668                 .sh_offset = offsetof(struct ElfImage, str),
6669                 .sh_size = sizeof(img->str),
6670             }
6671         },
6672         .sym = {
6673             [1] = { /* code_gen_buffer */
6674                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6675                 .st_shndx = 1,
6676             }
6677         },
6678         .di = {
6679             .len = sizeof(struct DebugInfo) - 4,
6680             .version = 2,
6681             .ptr_size = sizeof(void *),
6682             .cu_die = 1,
6683             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6684             .fn_die = 2,
6685             .fn_name = "code_gen_buffer"
6686         },
6687         .da = {
6688             1,          /* abbrev number (the cu) */
6689             0x11, 1,    /* DW_TAG_compile_unit, has children */
6690             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6691             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6692             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6693             0, 0,       /* end of abbrev */
6694             2,          /* abbrev number (the fn) */
6695             0x2e, 0,    /* DW_TAG_subprogram, no children */
6696             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6697             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6698             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6699             0, 0,       /* end of abbrev */
6700             0           /* no more abbrev */
6701         },
6702         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6703                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6704     };
6705 
6706     /* We only need a single jit entry; statically allocate it.  */
6707     static struct jit_code_entry one_entry;
6708 
6709     uintptr_t buf = (uintptr_t)buf_ptr;
6710     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6711     DebugFrameHeader *dfh;
6712 
6713     img = g_malloc(img_size);
6714     *img = img_template;
6715 
6716     img->phdr.p_vaddr = buf;
6717     img->phdr.p_paddr = buf;
6718     img->phdr.p_memsz = buf_size;
6719 
6720     img->shdr[1].sh_name = find_string(img->str, ".text");
6721     img->shdr[1].sh_addr = buf;
6722     img->shdr[1].sh_size = buf_size;
6723 
6724     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6725     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6726 
6727     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6728     img->shdr[4].sh_size = debug_frame_size;
6729 
6730     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6731     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6732 
6733     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6734     img->sym[1].st_value = buf;
6735     img->sym[1].st_size = buf_size;
6736 
6737     img->di.cu_low_pc = buf;
6738     img->di.cu_high_pc = buf + buf_size;
6739     img->di.fn_low_pc = buf;
6740     img->di.fn_high_pc = buf + buf_size;
6741 
6742     dfh = (DebugFrameHeader *)(img + 1);
6743     memcpy(dfh, debug_frame, debug_frame_size);
6744     dfh->fde.func_start = buf;
6745     dfh->fde.func_len = buf_size;
6746 
6747 #ifdef DEBUG_JIT
6748     /* Enable this block to be able to debug the ELF image file creation.
6749        One can use readelf, objdump, or other inspection utilities.  */
6750     {
6751         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6752         FILE *f = fopen(jit, "w+b");
6753         if (f) {
6754             if (fwrite(img, img_size, 1, f) != img_size) {
6755                 /* Avoid stupid unused return value warning for fwrite.  */
6756             }
6757             fclose(f);
6758         }
6759     }
6760 #endif
6761 
6762     one_entry.symfile_addr = img;
6763     one_entry.symfile_size = img_size;
6764 
6765     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6766     __jit_debug_descriptor.relevant_entry = &one_entry;
6767     __jit_debug_descriptor.first_entry = &one_entry;
6768     __jit_debug_register_code();
6769 }
6770 #else
6771 /* No support for the feature.  Provide the entry point expected by exec.c,
6772    and implement the internal function we declared earlier.  */
6773 
6774 static void tcg_register_jit_int(const void *buf, size_t size,
6775                                  const void *debug_frame,
6776                                  size_t debug_frame_size)
6777 {
6778 }
6779 
6780 void tcg_register_jit(const void *buf, size_t buf_size)
6781 {
6782 }
6783 #endif /* ELF_HOST_MACHINE */
6784 
6785 #if !TCG_TARGET_MAYBE_vec
6786 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6787 {
6788     g_assert_not_reached();
6789 }
6790 #endif
6791