xref: /qemu/tcg/tcg-op-ldst.c (revision 6b8f40c61bbfef1abe77eeb9c716ec642927c12c)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "tcg/tcg.h"
27 #include "tcg/tcg-temp-internal.h"
28 #include "tcg/tcg-op-common.h"
29 #include "tcg/tcg-mo.h"
30 #include "exec/translation-block.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 #include "tcg-has.h"
34 #include "tcg-target-mo.h"
35 
36 static void check_max_alignment(unsigned a_bits)
37 {
38     /*
39      * The requested alignment cannot overlap the TLB flags.
40      * FIXME: Must keep the count up-to-date with "exec/cpu-all.h".
41      */
42     if (tcg_use_softmmu) {
43         tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
44     }
45 }
46 
47 static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
48 {
49     unsigned a_bits = memop_alignment_bits(op);
50 
51     check_max_alignment(a_bits);
52 
53     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
54     if (a_bits == (op & MO_SIZE)) {
55         op = (op & ~MO_AMASK) | MO_ALIGN;
56     }
57 
58     switch (op & MO_SIZE) {
59     case MO_8:
60         op &= ~MO_BSWAP;
61         break;
62     case MO_16:
63         break;
64     case MO_32:
65         if (!is64) {
66             op &= ~MO_SIGN;
67         }
68         break;
69     case MO_64:
70         if (is64) {
71             op &= ~MO_SIGN;
72             break;
73         }
74         /* fall through */
75     default:
76         g_assert_not_reached();
77     }
78     if (st) {
79         op &= ~MO_SIGN;
80     }
81 
82     /* In serial mode, reduce atomicity. */
83     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
84         op &= ~MO_ATOM_MASK;
85         op |= MO_ATOM_NONE;
86     }
87 
88     return op;
89 }
90 
91 static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh,
92                      TCGTemp *addr, MemOpIdx oi)
93 {
94     if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
95         if (vh) {
96             tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh),
97                         temp_arg(addr), oi);
98         } else {
99             tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi);
100         }
101     } else {
102         /* See TCGV_LOW/HIGH. */
103         TCGTemp *al = addr + HOST_BIG_ENDIAN;
104         TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
105 
106         if (vh) {
107             tcg_gen_op5(opc, type, temp_arg(vl), temp_arg(vh),
108                         temp_arg(al), temp_arg(ah), oi);
109         } else {
110             tcg_gen_op4(opc, type, temp_arg(vl),
111                         temp_arg(al), temp_arg(ah), oi);
112         }
113     }
114 }
115 
116 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
117 {
118     if (TCG_TARGET_REG_BITS == 32) {
119         TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
120         TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
121         gen_ldst(opc, TCG_TYPE_I64, vl, vh, addr, oi);
122     } else {
123         gen_ldst(opc, TCG_TYPE_I64, tcgv_i64_temp(v), NULL, addr, oi);
124     }
125 }
126 
127 static void tcg_gen_req_mo(TCGBar type)
128 {
129     type &= tcg_ctx->guest_mo;
130     type &= ~TCG_TARGET_DEFAULT_MO;
131     if (type) {
132         tcg_gen_mb(type | TCG_BAR_SC);
133     }
134 }
135 
136 /* Only required for loads, where value might overlap addr. */
137 static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
138 {
139 #ifdef CONFIG_PLUGIN
140     if (tcg_ctx->plugin_insn != NULL) {
141         /* Save a copy of the vaddr for use after a load.  */
142         TCGv_i64 temp = tcg_temp_ebb_new_i64();
143         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
144             tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
145         } else {
146             tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
147         }
148         return temp;
149     }
150 #endif
151     return NULL;
152 }
153 
154 #ifdef CONFIG_PLUGIN
155 static void
156 plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
157                          enum qemu_plugin_mem_rw rw)
158 {
159     if (tcg_ctx->plugin_insn != NULL) {
160         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
161 
162         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
163             if (!copy_addr) {
164                 copy_addr = tcg_temp_ebb_new_i64();
165                 tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
166             }
167             tcg_gen_plugin_mem_cb(copy_addr, info);
168             tcg_temp_free_i64(copy_addr);
169         } else {
170             if (copy_addr) {
171                 tcg_gen_plugin_mem_cb(copy_addr, info);
172                 tcg_temp_free_i64(copy_addr);
173             } else {
174                 tcg_gen_plugin_mem_cb(temp_tcgv_i64(orig_addr), info);
175             }
176         }
177     }
178 }
179 #endif
180 
181 static void
182 plugin_gen_mem_callbacks_i32(TCGv_i32 val,
183                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
184                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
185 {
186 #ifdef CONFIG_PLUGIN
187     if (tcg_ctx->plugin_insn != NULL) {
188         tcg_gen_st_i32(val, tcg_env,
189                        offsetof(CPUState, neg.plugin_mem_value_low) -
190                        sizeof(CPUState) + (HOST_BIG_ENDIAN * 4));
191         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
192     }
193 #endif
194 }
195 
196 static void
197 plugin_gen_mem_callbacks_i64(TCGv_i64 val,
198                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
199                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
200 {
201 #ifdef CONFIG_PLUGIN
202     if (tcg_ctx->plugin_insn != NULL) {
203         tcg_gen_st_i64(val, tcg_env,
204                        offsetof(CPUState, neg.plugin_mem_value_low) -
205                        sizeof(CPUState));
206         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
207     }
208 #endif
209 }
210 
211 static void
212 plugin_gen_mem_callbacks_i128(TCGv_i128 val,
213                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
214                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
215 {
216 #ifdef CONFIG_PLUGIN
217     if (tcg_ctx->plugin_insn != NULL) {
218         tcg_gen_st_i64(TCGV128_LOW(val), tcg_env,
219                        offsetof(CPUState, neg.plugin_mem_value_low) -
220                        sizeof(CPUState));
221         tcg_gen_st_i64(TCGV128_HIGH(val), tcg_env,
222                        offsetof(CPUState, neg.plugin_mem_value_high) -
223                        sizeof(CPUState));
224         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
225     }
226 #endif
227 }
228 
229 static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
230                                     TCGArg idx, MemOp memop)
231 {
232     MemOp orig_memop;
233     MemOpIdx orig_oi, oi;
234     TCGv_i64 copy_addr;
235     TCGOpcode opc;
236 
237     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
238     orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
239     orig_oi = oi = make_memop_idx(memop, idx);
240 
241     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
242         memop &= ~MO_BSWAP;
243         /* The bswap primitive benefits from zero-extended input.  */
244         if ((memop & MO_SSIZE) == MO_SW) {
245             memop &= ~MO_SIGN;
246         }
247         oi = make_memop_idx(memop, idx);
248     }
249 
250     copy_addr = plugin_maybe_preserve_addr(addr);
251     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
252         opc = INDEX_op_qemu_ld_a32_i32;
253     } else {
254         opc = INDEX_op_qemu_ld_a64_i32;
255     }
256     gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
257     plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
258                                  QEMU_PLUGIN_MEM_R);
259 
260     if ((orig_memop ^ memop) & MO_BSWAP) {
261         switch (orig_memop & MO_SIZE) {
262         case MO_16:
263             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
264                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
265                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
266             break;
267         case MO_32:
268             tcg_gen_bswap32_i32(val, val);
269             break;
270         default:
271             g_assert_not_reached();
272         }
273     }
274 }
275 
276 void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
277                              MemOp memop, TCGType addr_type)
278 {
279     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
280     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
281     tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
282 }
283 
284 static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
285                                     TCGArg idx, MemOp memop)
286 {
287     TCGv_i32 swap = NULL;
288     MemOpIdx orig_oi, oi;
289     TCGOpcode opc;
290 
291     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
292     memop = tcg_canonicalize_memop(memop, 0, 1);
293     orig_oi = oi = make_memop_idx(memop, idx);
294 
295     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
296         swap = tcg_temp_ebb_new_i32();
297         switch (memop & MO_SIZE) {
298         case MO_16:
299             tcg_gen_bswap16_i32(swap, val, 0);
300             break;
301         case MO_32:
302             tcg_gen_bswap32_i32(swap, val);
303             break;
304         default:
305             g_assert_not_reached();
306         }
307         val = swap;
308         memop &= ~MO_BSWAP;
309         oi = make_memop_idx(memop, idx);
310     }
311 
312     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
313         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
314             opc = INDEX_op_qemu_st8_a32_i32;
315         } else {
316             opc = INDEX_op_qemu_st8_a64_i32;
317         }
318     } else {
319         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
320             opc = INDEX_op_qemu_st_a32_i32;
321         } else {
322             opc = INDEX_op_qemu_st_a64_i32;
323         }
324     }
325     gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
326     plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
327 
328     if (swap) {
329         tcg_temp_free_i32(swap);
330     }
331 }
332 
333 void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
334                              MemOp memop, TCGType addr_type)
335 {
336     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
337     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
338     tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
339 }
340 
341 static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
342                                     TCGArg idx, MemOp memop)
343 {
344     MemOp orig_memop;
345     MemOpIdx orig_oi, oi;
346     TCGv_i64 copy_addr;
347     TCGOpcode opc;
348 
349     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
350         tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
351         if (memop & MO_SIGN) {
352             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
353         } else {
354             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
355         }
356         return;
357     }
358 
359     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
360     orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
361     orig_oi = oi = make_memop_idx(memop, idx);
362 
363     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
364         memop &= ~MO_BSWAP;
365         /* The bswap primitive benefits from zero-extended input.  */
366         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
367             memop &= ~MO_SIGN;
368         }
369         oi = make_memop_idx(memop, idx);
370     }
371 
372     copy_addr = plugin_maybe_preserve_addr(addr);
373     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
374         opc = INDEX_op_qemu_ld_a32_i64;
375     } else {
376         opc = INDEX_op_qemu_ld_a64_i64;
377     }
378     gen_ldst_i64(opc, val, addr, oi);
379     plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi,
380                                  QEMU_PLUGIN_MEM_R);
381 
382     if ((orig_memop ^ memop) & MO_BSWAP) {
383         int flags = (orig_memop & MO_SIGN
384                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
385                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
386         switch (orig_memop & MO_SIZE) {
387         case MO_16:
388             tcg_gen_bswap16_i64(val, val, flags);
389             break;
390         case MO_32:
391             tcg_gen_bswap32_i64(val, val, flags);
392             break;
393         case MO_64:
394             tcg_gen_bswap64_i64(val, val);
395             break;
396         default:
397             g_assert_not_reached();
398         }
399     }
400 }
401 
402 void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
403                              MemOp memop, TCGType addr_type)
404 {
405     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
406     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
407     tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
408 }
409 
410 static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
411                                     TCGArg idx, MemOp memop)
412 {
413     TCGv_i64 swap = NULL;
414     MemOpIdx orig_oi, oi;
415     TCGOpcode opc;
416 
417     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
418         tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
419         return;
420     }
421 
422     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
423     memop = tcg_canonicalize_memop(memop, 1, 1);
424     orig_oi = oi = make_memop_idx(memop, idx);
425 
426     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
427         swap = tcg_temp_ebb_new_i64();
428         switch (memop & MO_SIZE) {
429         case MO_16:
430             tcg_gen_bswap16_i64(swap, val, 0);
431             break;
432         case MO_32:
433             tcg_gen_bswap32_i64(swap, val, 0);
434             break;
435         case MO_64:
436             tcg_gen_bswap64_i64(swap, val);
437             break;
438         default:
439             g_assert_not_reached();
440         }
441         val = swap;
442         memop &= ~MO_BSWAP;
443         oi = make_memop_idx(memop, idx);
444     }
445 
446     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
447         opc = INDEX_op_qemu_st_a32_i64;
448     } else {
449         opc = INDEX_op_qemu_st_a64_i64;
450     }
451     gen_ldst_i64(opc, val, addr, oi);
452     plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
453 
454     if (swap) {
455         tcg_temp_free_i64(swap);
456     }
457 }
458 
459 void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
460                              MemOp memop, TCGType addr_type)
461 {
462     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
463     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
464     tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
465 }
466 
467 /*
468  * Return true if @mop, without knowledge of the pointer alignment,
469  * does not require 16-byte atomicity, and it would be adventagous
470  * to avoid a call to a helper function.
471  */
472 static bool use_two_i64_for_i128(MemOp mop)
473 {
474     /* Two softmmu tlb lookups is larger than one function call. */
475     if (tcg_use_softmmu) {
476         return false;
477     }
478 
479     /*
480      * For user-only, two 64-bit operations may well be smaller than a call.
481      * Determine if that would be legal for the requested atomicity.
482      */
483     switch (mop & MO_ATOM_MASK) {
484     case MO_ATOM_NONE:
485     case MO_ATOM_IFALIGN_PAIR:
486         return true;
487     case MO_ATOM_IFALIGN:
488     case MO_ATOM_SUBALIGN:
489     case MO_ATOM_WITHIN16:
490     case MO_ATOM_WITHIN16_PAIR:
491         return false;
492     default:
493         g_assert_not_reached();
494     }
495 }
496 
497 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
498 {
499     MemOp mop_1 = orig, mop_2;
500 
501     /* Reduce the size to 64-bit. */
502     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
503 
504     /* Retain the alignment constraints of the original. */
505     switch (orig & MO_AMASK) {
506     case MO_UNALN:
507     case MO_ALIGN_2:
508     case MO_ALIGN_4:
509         mop_2 = mop_1;
510         break;
511     case MO_ALIGN_8:
512         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
513         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
514         mop_2 = mop_1;
515         break;
516     case MO_ALIGN:
517         /* Second has 8-byte alignment; first has 16-byte alignment. */
518         mop_2 = mop_1;
519         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
520         break;
521     case MO_ALIGN_16:
522     case MO_ALIGN_32:
523     case MO_ALIGN_64:
524         /* Second has 8-byte alignment; first retains original. */
525         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
526         break;
527     default:
528         g_assert_not_reached();
529     }
530 
531     /* Use a memory ordering implemented by the host. */
532     if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
533         mop_1 &= ~MO_BSWAP;
534         mop_2 &= ~MO_BSWAP;
535     }
536 
537     ret[0] = mop_1;
538     ret[1] = mop_2;
539 }
540 
541 static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
542 {
543     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
544         TCGv_i64 a64 = tcg_temp_ebb_new_i64();
545         tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
546         return a64;
547     }
548     return temp_tcgv_i64(addr);
549 }
550 
551 static void maybe_free_addr64(TCGv_i64 a64)
552 {
553     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
554         tcg_temp_free_i64(a64);
555     }
556 }
557 
558 static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
559                                      TCGArg idx, MemOp memop)
560 {
561     MemOpIdx orig_oi;
562     TCGv_i64 ext_addr = NULL;
563     TCGOpcode opc;
564 
565     check_max_alignment(memop_alignment_bits(memop));
566     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
567 
568     /* In serial mode, reduce atomicity. */
569     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
570         memop &= ~MO_ATOM_MASK;
571         memop |= MO_ATOM_NONE;
572     }
573     orig_oi = make_memop_idx(memop, idx);
574 
575     /* TODO: For now, force 32-bit hosts to use the helper. */
576     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
577         TCGv_i64 lo, hi;
578         bool need_bswap = false;
579         MemOpIdx oi = orig_oi;
580 
581         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
582             lo = TCGV128_HIGH(val);
583             hi = TCGV128_LOW(val);
584             oi = make_memop_idx(memop & ~MO_BSWAP, idx);
585             need_bswap = true;
586         } else {
587             lo = TCGV128_LOW(val);
588             hi = TCGV128_HIGH(val);
589         }
590 
591         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
592             opc = INDEX_op_qemu_ld_a32_i128;
593         } else {
594             opc = INDEX_op_qemu_ld_a64_i128;
595         }
596         gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
597                  tcgv_i64_temp(hi), addr, oi);
598 
599         if (need_bswap) {
600             tcg_gen_bswap64_i64(lo, lo);
601             tcg_gen_bswap64_i64(hi, hi);
602         }
603     } else if (use_two_i64_for_i128(memop)) {
604         MemOp mop[2];
605         TCGTemp *addr_p8;
606         TCGv_i64 x, y;
607         bool need_bswap;
608 
609         canonicalize_memop_i128_as_i64(mop, memop);
610         need_bswap = (mop[0] ^ memop) & MO_BSWAP;
611 
612         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
613             opc = INDEX_op_qemu_ld_a32_i64;
614         } else {
615             opc = INDEX_op_qemu_ld_a64_i64;
616         }
617 
618         /*
619          * Since there are no global TCGv_i128, there is no visible state
620          * changed if the second load faults.  Load directly into the two
621          * subwords.
622          */
623         if ((memop & MO_BSWAP) == MO_LE) {
624             x = TCGV128_LOW(val);
625             y = TCGV128_HIGH(val);
626         } else {
627             x = TCGV128_HIGH(val);
628             y = TCGV128_LOW(val);
629         }
630 
631         gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
632 
633         if (need_bswap) {
634             tcg_gen_bswap64_i64(x, x);
635         }
636 
637         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
638             TCGv_i32 t = tcg_temp_ebb_new_i32();
639             tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
640             addr_p8 = tcgv_i32_temp(t);
641         } else {
642             TCGv_i64 t = tcg_temp_ebb_new_i64();
643             tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
644             addr_p8 = tcgv_i64_temp(t);
645         }
646 
647         gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
648         tcg_temp_free_internal(addr_p8);
649 
650         if (need_bswap) {
651             tcg_gen_bswap64_i64(y, y);
652         }
653     } else {
654         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
655             ext_addr = tcg_temp_ebb_new_i64();
656             tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
657             addr = tcgv_i64_temp(ext_addr);
658         }
659         gen_helper_ld_i128(val, tcg_env, temp_tcgv_i64(addr),
660                            tcg_constant_i32(orig_oi));
661     }
662 
663     plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
664                                   QEMU_PLUGIN_MEM_R);
665 }
666 
667 void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
668                               MemOp memop, TCGType addr_type)
669 {
670     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
671     tcg_debug_assert((memop & MO_SIZE) == MO_128);
672     tcg_debug_assert((memop & MO_SIGN) == 0);
673     tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
674 }
675 
676 static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
677                                      TCGArg idx, MemOp memop)
678 {
679     MemOpIdx orig_oi;
680     TCGv_i64 ext_addr = NULL;
681     TCGOpcode opc;
682 
683     check_max_alignment(memop_alignment_bits(memop));
684     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
685 
686     /* In serial mode, reduce atomicity. */
687     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
688         memop &= ~MO_ATOM_MASK;
689         memop |= MO_ATOM_NONE;
690     }
691     orig_oi = make_memop_idx(memop, idx);
692 
693     /* TODO: For now, force 32-bit hosts to use the helper. */
694 
695     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
696         TCGv_i64 lo, hi;
697         MemOpIdx oi = orig_oi;
698         bool need_bswap = false;
699 
700         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
701             lo = tcg_temp_ebb_new_i64();
702             hi = tcg_temp_ebb_new_i64();
703             tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
704             tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
705             oi = make_memop_idx(memop & ~MO_BSWAP, idx);
706             need_bswap = true;
707         } else {
708             lo = TCGV128_LOW(val);
709             hi = TCGV128_HIGH(val);
710         }
711 
712         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
713             opc = INDEX_op_qemu_st_a32_i128;
714         } else {
715             opc = INDEX_op_qemu_st_a64_i128;
716         }
717         gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
718                  tcgv_i64_temp(hi), addr, oi);
719 
720         if (need_bswap) {
721             tcg_temp_free_i64(lo);
722             tcg_temp_free_i64(hi);
723         }
724     } else if (use_two_i64_for_i128(memop)) {
725         MemOp mop[2];
726         TCGTemp *addr_p8;
727         TCGv_i64 x, y, b = NULL;
728 
729         canonicalize_memop_i128_as_i64(mop, memop);
730 
731         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
732             opc = INDEX_op_qemu_st_a32_i64;
733         } else {
734             opc = INDEX_op_qemu_st_a64_i64;
735         }
736 
737         if ((memop & MO_BSWAP) == MO_LE) {
738             x = TCGV128_LOW(val);
739             y = TCGV128_HIGH(val);
740         } else {
741             x = TCGV128_HIGH(val);
742             y = TCGV128_LOW(val);
743         }
744 
745         if ((mop[0] ^ memop) & MO_BSWAP) {
746             b = tcg_temp_ebb_new_i64();
747             tcg_gen_bswap64_i64(b, x);
748             x = b;
749         }
750 
751         gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
752 
753         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
754             TCGv_i32 t = tcg_temp_ebb_new_i32();
755             tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
756             addr_p8 = tcgv_i32_temp(t);
757         } else {
758             TCGv_i64 t = tcg_temp_ebb_new_i64();
759             tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
760             addr_p8 = tcgv_i64_temp(t);
761         }
762 
763         if (b) {
764             tcg_gen_bswap64_i64(b, y);
765             gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
766             tcg_temp_free_i64(b);
767         } else {
768             gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
769         }
770         tcg_temp_free_internal(addr_p8);
771     } else {
772         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
773             ext_addr = tcg_temp_ebb_new_i64();
774             tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
775             addr = tcgv_i64_temp(ext_addr);
776         }
777         gen_helper_st_i128(tcg_env, temp_tcgv_i64(addr), val,
778                            tcg_constant_i32(orig_oi));
779     }
780 
781     plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
782                                   QEMU_PLUGIN_MEM_W);
783 }
784 
785 void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
786                               MemOp memop, TCGType addr_type)
787 {
788     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
789     tcg_debug_assert((memop & MO_SIZE) == MO_128);
790     tcg_debug_assert((memop & MO_SIGN) == 0);
791     tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
792 }
793 
794 void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
795 {
796     switch (opc & MO_SSIZE) {
797     case MO_SB:
798         tcg_gen_ext8s_i32(ret, val);
799         break;
800     case MO_UB:
801         tcg_gen_ext8u_i32(ret, val);
802         break;
803     case MO_SW:
804         tcg_gen_ext16s_i32(ret, val);
805         break;
806     case MO_UW:
807         tcg_gen_ext16u_i32(ret, val);
808         break;
809     case MO_UL:
810     case MO_SL:
811         tcg_gen_mov_i32(ret, val);
812         break;
813     default:
814         g_assert_not_reached();
815     }
816 }
817 
818 void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
819 {
820     switch (opc & MO_SSIZE) {
821     case MO_SB:
822         tcg_gen_ext8s_i64(ret, val);
823         break;
824     case MO_UB:
825         tcg_gen_ext8u_i64(ret, val);
826         break;
827     case MO_SW:
828         tcg_gen_ext16s_i64(ret, val);
829         break;
830     case MO_UW:
831         tcg_gen_ext16u_i64(ret, val);
832         break;
833     case MO_SL:
834         tcg_gen_ext32s_i64(ret, val);
835         break;
836     case MO_UL:
837         tcg_gen_ext32u_i64(ret, val);
838         break;
839     case MO_UQ:
840     case MO_SQ:
841         tcg_gen_mov_i64(ret, val);
842         break;
843     default:
844         g_assert_not_reached();
845     }
846 }
847 
848 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
849                                   TCGv_i32, TCGv_i32, TCGv_i32);
850 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
851                                   TCGv_i64, TCGv_i64, TCGv_i32);
852 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
853                                    TCGv_i128, TCGv_i128, TCGv_i32);
854 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
855                                   TCGv_i32, TCGv_i32);
856 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
857                                   TCGv_i64, TCGv_i32);
858 
859 #ifdef CONFIG_ATOMIC64
860 # define WITH_ATOMIC64(X) X,
861 #else
862 # define WITH_ATOMIC64(X)
863 #endif
864 #if HAVE_CMPXCHG128
865 # define WITH_ATOMIC128(X) X,
866 #else
867 # define WITH_ATOMIC128(X)
868 #endif
869 
870 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
871     [MO_8] = gen_helper_atomic_cmpxchgb,
872     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
873     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
874     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
875     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
876     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
877     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
878     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
879     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
880 };
881 
882 static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
883                                               TCGv_i32 cmpv, TCGv_i32 newv,
884                                               TCGArg idx, MemOp memop)
885 {
886     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
887     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
888 
889     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
890 
891     tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
892     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
893     tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
894     tcg_temp_free_i32(t2);
895 
896     if (memop & MO_SIGN) {
897         tcg_gen_ext_i32(retv, t1, memop);
898     } else {
899         tcg_gen_mov_i32(retv, t1);
900     }
901     tcg_temp_free_i32(t1);
902 }
903 
904 void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
905                                        TCGv_i32 cmpv, TCGv_i32 newv,
906                                        TCGArg idx, MemOp memop,
907                                        TCGType addr_type)
908 {
909     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
910     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
911     tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
912 }
913 
914 static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
915                                            TCGv_i32 cmpv, TCGv_i32 newv,
916                                            TCGArg idx, MemOp memop)
917 {
918     gen_atomic_cx_i32 gen;
919     TCGv_i64 a64;
920     MemOpIdx oi;
921 
922     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
923         tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
924         return;
925     }
926 
927     memop = tcg_canonicalize_memop(memop, 0, 0);
928     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
929     tcg_debug_assert(gen != NULL);
930 
931     oi = make_memop_idx(memop & ~MO_SIGN, idx);
932     a64 = maybe_extend_addr64(addr);
933     gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
934     maybe_free_addr64(a64);
935 
936     if (memop & MO_SIGN) {
937         tcg_gen_ext_i32(retv, retv, memop);
938     }
939 }
940 
941 void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
942                                     TCGv_i32 cmpv, TCGv_i32 newv,
943                                     TCGArg idx, MemOp memop,
944                                     TCGType addr_type)
945 {
946     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
947     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
948     tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
949 }
950 
951 static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
952                                               TCGv_i64 cmpv, TCGv_i64 newv,
953                                               TCGArg idx, MemOp memop)
954 {
955     TCGv_i64 t1, t2;
956 
957     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
958         tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
959                                           TCGV_LOW(newv), idx, memop);
960         if (memop & MO_SIGN) {
961             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
962         } else {
963             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
964         }
965         return;
966     }
967 
968     t1 = tcg_temp_ebb_new_i64();
969     t2 = tcg_temp_ebb_new_i64();
970 
971     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
972 
973     tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
974     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
975     tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
976     tcg_temp_free_i64(t2);
977 
978     if (memop & MO_SIGN) {
979         tcg_gen_ext_i64(retv, t1, memop);
980     } else {
981         tcg_gen_mov_i64(retv, t1);
982     }
983     tcg_temp_free_i64(t1);
984 }
985 
986 void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
987                                        TCGv_i64 cmpv, TCGv_i64 newv,
988                                        TCGArg idx, MemOp memop,
989                                        TCGType addr_type)
990 {
991     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
992     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
993     tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
994 }
995 
996 static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
997                                            TCGv_i64 cmpv, TCGv_i64 newv,
998                                            TCGArg idx, MemOp memop)
999 {
1000     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1001         tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
1002         return;
1003     }
1004 
1005     if ((memop & MO_SIZE) == MO_64) {
1006         gen_atomic_cx_i64 gen;
1007 
1008         memop = tcg_canonicalize_memop(memop, 1, 0);
1009         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1010         if (gen) {
1011             MemOpIdx oi = make_memop_idx(memop, idx);
1012             TCGv_i64 a64 = maybe_extend_addr64(addr);
1013             gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
1014             maybe_free_addr64(a64);
1015             return;
1016         }
1017 
1018         gen_helper_exit_atomic(tcg_env);
1019 
1020         /*
1021          * Produce a result for a well-formed opcode stream.  This satisfies
1022          * liveness for set before used, which happens before this dead code
1023          * is removed.
1024          */
1025         tcg_gen_movi_i64(retv, 0);
1026         return;
1027     }
1028 
1029     if (TCG_TARGET_REG_BITS == 32) {
1030         tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1031                                        TCGV_LOW(newv), idx, memop);
1032         if (memop & MO_SIGN) {
1033             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1034         } else {
1035             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1036         }
1037     } else {
1038         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
1039         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
1040         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1041 
1042         tcg_gen_extrl_i64_i32(c32, cmpv);
1043         tcg_gen_extrl_i64_i32(n32, newv);
1044         tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
1045                                        idx, memop & ~MO_SIGN);
1046         tcg_temp_free_i32(c32);
1047         tcg_temp_free_i32(n32);
1048 
1049         tcg_gen_extu_i32_i64(retv, r32);
1050         tcg_temp_free_i32(r32);
1051 
1052         if (memop & MO_SIGN) {
1053             tcg_gen_ext_i64(retv, retv, memop);
1054         }
1055     }
1056 }
1057 
1058 void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
1059                                     TCGv_i64 cmpv, TCGv_i64 newv,
1060                                     TCGArg idx, MemOp memop, TCGType addr_type)
1061 {
1062     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1063     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
1064     tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
1065 }
1066 
1067 static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1068                                                TCGv_i128 cmpv, TCGv_i128 newv,
1069                                                TCGArg idx, MemOp memop)
1070 {
1071     if (TCG_TARGET_REG_BITS == 32) {
1072         /* Inline expansion below is simply too large for 32-bit hosts. */
1073         MemOpIdx oi = make_memop_idx(memop, idx);
1074         TCGv_i64 a64 = maybe_extend_addr64(addr);
1075 
1076         gen_helper_nonatomic_cmpxchgo(retv, tcg_env, a64, cmpv, newv,
1077                                       tcg_constant_i32(oi));
1078         maybe_free_addr64(a64);
1079     } else {
1080         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1081         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1082         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1083         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1084         TCGv_i64 z = tcg_constant_i64(0);
1085 
1086         tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
1087 
1088         /* Compare i128 */
1089         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
1090         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
1091         tcg_gen_or_i64(t0, t0, t1);
1092 
1093         /* tmpv = equal ? newv : oldv */
1094         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
1095                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
1096         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1097                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1098 
1099         /* Unconditional writeback. */
1100         tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
1101         tcg_gen_mov_i128(retv, oldv);
1102 
1103         tcg_temp_free_i64(t0);
1104         tcg_temp_free_i64(t1);
1105         tcg_temp_free_i128(tmpv);
1106         tcg_temp_free_i128(oldv);
1107     }
1108 }
1109 
1110 void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1111                                         TCGv_i128 cmpv, TCGv_i128 newv,
1112                                         TCGArg idx, MemOp memop,
1113                                         TCGType addr_type)
1114 {
1115     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1116     tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1117     tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1118 }
1119 
1120 static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1121                                             TCGv_i128 cmpv, TCGv_i128 newv,
1122                                             TCGArg idx, MemOp memop)
1123 {
1124     gen_atomic_cx_i128 gen;
1125 
1126     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1127         tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1128         return;
1129     }
1130 
1131     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1132     if (gen) {
1133         MemOpIdx oi = make_memop_idx(memop, idx);
1134         TCGv_i64 a64 = maybe_extend_addr64(addr);
1135         gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
1136         maybe_free_addr64(a64);
1137         return;
1138     }
1139 
1140     gen_helper_exit_atomic(tcg_env);
1141 
1142     /*
1143      * Produce a result for a well-formed opcode stream.  This satisfies
1144      * liveness for set before used, which happens before this dead code
1145      * is removed.
1146      */
1147     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1148     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1149 }
1150 
1151 void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1152                                      TCGv_i128 cmpv, TCGv_i128 newv,
1153                                      TCGArg idx, MemOp memop,
1154                                      TCGType addr_type)
1155 {
1156     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1157     tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1158     tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1159 }
1160 
1161 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1162                                 TCGArg idx, MemOp memop, bool new_val,
1163                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1164 {
1165     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1166     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1167 
1168     memop = tcg_canonicalize_memop(memop, 0, 0);
1169 
1170     tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
1171     tcg_gen_ext_i32(t2, val, memop);
1172     gen(t2, t1, t2);
1173     tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
1174 
1175     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1176     tcg_temp_free_i32(t1);
1177     tcg_temp_free_i32(t2);
1178 }
1179 
1180 static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1181                              TCGArg idx, MemOp memop, void * const table[])
1182 {
1183     gen_atomic_op_i32 gen;
1184     TCGv_i64 a64;
1185     MemOpIdx oi;
1186 
1187     memop = tcg_canonicalize_memop(memop, 0, 0);
1188 
1189     gen = table[memop & (MO_SIZE | MO_BSWAP)];
1190     tcg_debug_assert(gen != NULL);
1191 
1192     oi = make_memop_idx(memop & ~MO_SIGN, idx);
1193     a64 = maybe_extend_addr64(addr);
1194     gen(ret, tcg_env, a64, val, tcg_constant_i32(oi));
1195     maybe_free_addr64(a64);
1196 
1197     if (memop & MO_SIGN) {
1198         tcg_gen_ext_i32(ret, ret, memop);
1199     }
1200 }
1201 
1202 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1203                                 TCGArg idx, MemOp memop, bool new_val,
1204                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1205 {
1206     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1207     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1208 
1209     memop = tcg_canonicalize_memop(memop, 1, 0);
1210 
1211     tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
1212     tcg_gen_ext_i64(t2, val, memop);
1213     gen(t2, t1, t2);
1214     tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
1215 
1216     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1217     tcg_temp_free_i64(t1);
1218     tcg_temp_free_i64(t2);
1219 }
1220 
1221 static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1222                              TCGArg idx, MemOp memop, void * const table[])
1223 {
1224     memop = tcg_canonicalize_memop(memop, 1, 0);
1225 
1226     if ((memop & MO_SIZE) == MO_64) {
1227         gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
1228 
1229         if (gen) {
1230             MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
1231             TCGv_i64 a64 = maybe_extend_addr64(addr);
1232             gen(ret, tcg_env, a64, val, tcg_constant_i32(oi));
1233             maybe_free_addr64(a64);
1234             return;
1235         }
1236 
1237         gen_helper_exit_atomic(tcg_env);
1238         /* Produce a result, so that we have a well-formed opcode stream
1239            with respect to uses of the result in the (dead) code following.  */
1240         tcg_gen_movi_i64(ret, 0);
1241     } else {
1242         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1243         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1244 
1245         tcg_gen_extrl_i64_i32(v32, val);
1246         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1247         tcg_temp_free_i32(v32);
1248 
1249         tcg_gen_extu_i32_i64(ret, r32);
1250         tcg_temp_free_i32(r32);
1251 
1252         if (memop & MO_SIGN) {
1253             tcg_gen_ext_i64(ret, ret, memop);
1254         }
1255     }
1256 }
1257 
1258 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
1259 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
1260     [MO_8] = gen_helper_atomic_##NAME##b,                               \
1261     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
1262     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
1263     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
1264     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
1265     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
1266     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
1267 };                                                                      \
1268 void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr,       \
1269                                      TCGv_i32 val, TCGArg idx,          \
1270                                      MemOp memop, TCGType addr_type)    \
1271 {                                                                       \
1272     tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1273     tcg_debug_assert((memop & MO_SIZE) <= MO_32);                       \
1274     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1275         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
1276     } else {                                                            \
1277         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
1278                             tcg_gen_##OP##_i32);                        \
1279     }                                                                   \
1280 }                                                                       \
1281 void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr,       \
1282                                      TCGv_i64 val, TCGArg idx,          \
1283                                      MemOp memop, TCGType addr_type)    \
1284 {                                                                       \
1285     tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1286     tcg_debug_assert((memop & MO_SIZE) <= MO_64);                       \
1287     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1288         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
1289     } else {                                                            \
1290         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
1291                             tcg_gen_##OP##_i64);                        \
1292     }                                                                   \
1293 }
1294 
1295 GEN_ATOMIC_HELPER(fetch_add, add, 0)
1296 GEN_ATOMIC_HELPER(fetch_and, and, 0)
1297 GEN_ATOMIC_HELPER(fetch_or, or, 0)
1298 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1299 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1300 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1301 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1302 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1303 
1304 GEN_ATOMIC_HELPER(add_fetch, add, 1)
1305 GEN_ATOMIC_HELPER(and_fetch, and, 1)
1306 GEN_ATOMIC_HELPER(or_fetch, or, 1)
1307 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1308 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1309 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1310 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1311 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1312 
1313 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1314 {
1315     tcg_gen_mov_i32(r, b);
1316 }
1317 
1318 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1319 {
1320     tcg_gen_mov_i64(r, b);
1321 }
1322 
1323 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1324 
1325 #undef GEN_ATOMIC_HELPER
1326