xref: /qemu/tcg/tcg-op-ldst.c (revision 28ea568a039f7c8c8df168800602725062f6dd5c)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-temp-internal.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg/tcg-mo.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 
34 
35 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
36 {
37     /* Trigger the asserts within as early as possible.  */
38     unsigned a_bits = get_alignment_bits(op);
39 
40     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
41     if (a_bits == (op & MO_SIZE)) {
42         op = (op & ~MO_AMASK) | MO_ALIGN;
43     }
44 
45     switch (op & MO_SIZE) {
46     case MO_8:
47         op &= ~MO_BSWAP;
48         break;
49     case MO_16:
50         break;
51     case MO_32:
52         if (!is64) {
53             op &= ~MO_SIGN;
54         }
55         break;
56     case MO_64:
57         if (is64) {
58             op &= ~MO_SIGN;
59             break;
60         }
61         /* fall through */
62     default:
63         g_assert_not_reached();
64     }
65     if (st) {
66         op &= ~MO_SIGN;
67     }
68     return op;
69 }
70 
71 static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
72                      TCGTemp *addr, MemOpIdx oi)
73 {
74     if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
75         if (vh) {
76             tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
77         } else {
78             tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
79         }
80     } else {
81         /* See TCGV_LOW/HIGH. */
82         TCGTemp *al = addr + HOST_BIG_ENDIAN;
83         TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
84 
85         if (vh) {
86             tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
87                         temp_arg(al), temp_arg(ah), oi);
88         } else {
89             tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
90         }
91     }
92 }
93 
94 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
95 {
96     if (TCG_TARGET_REG_BITS == 32) {
97         TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
98         TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
99         gen_ldst(opc, vl, vh, addr, oi);
100     } else {
101         gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
102     }
103 }
104 
105 static void tcg_gen_req_mo(TCGBar type)
106 {
107     type &= tcg_ctx->guest_mo;
108     type &= ~TCG_TARGET_DEFAULT_MO;
109     if (type) {
110         tcg_gen_mb(type | TCG_BAR_SC);
111     }
112 }
113 
114 /* Only required for loads, where value might overlap addr. */
115 static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
116 {
117 #ifdef CONFIG_PLUGIN
118     if (tcg_ctx->plugin_insn != NULL) {
119         /* Save a copy of the vaddr for use after a load.  */
120         TCGv_i64 temp = tcg_temp_ebb_new_i64();
121         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
122             tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
123         } else {
124             tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
125         }
126         return temp;
127     }
128 #endif
129     return NULL;
130 }
131 
132 static void
133 plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
134                          enum qemu_plugin_mem_rw rw)
135 {
136 #ifdef CONFIG_PLUGIN
137     if (tcg_ctx->plugin_insn != NULL) {
138         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
139 
140         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
141             if (!copy_addr) {
142                 copy_addr = tcg_temp_ebb_new_i64();
143                 tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
144             }
145             plugin_gen_empty_mem_callback(copy_addr, info);
146             tcg_temp_free_i64(copy_addr);
147         } else {
148             if (copy_addr) {
149                 plugin_gen_empty_mem_callback(copy_addr, info);
150                 tcg_temp_free_i64(copy_addr);
151             } else {
152                 plugin_gen_empty_mem_callback(temp_tcgv_i64(orig_addr), info);
153             }
154         }
155     }
156 #endif
157 }
158 
159 static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
160                                     TCGArg idx, MemOp memop)
161 {
162     MemOp orig_memop;
163     MemOpIdx orig_oi, oi;
164     TCGv_i64 copy_addr;
165     TCGOpcode opc;
166 
167     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
168     orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
169     orig_oi = oi = make_memop_idx(memop, idx);
170 
171     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
172         memop &= ~MO_BSWAP;
173         /* The bswap primitive benefits from zero-extended input.  */
174         if ((memop & MO_SSIZE) == MO_SW) {
175             memop &= ~MO_SIGN;
176         }
177         oi = make_memop_idx(memop, idx);
178     }
179 
180     copy_addr = plugin_maybe_preserve_addr(addr);
181     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
182         opc = INDEX_op_qemu_ld_a32_i32;
183     } else {
184         opc = INDEX_op_qemu_ld_a64_i32;
185     }
186     gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
187     plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
188 
189     if ((orig_memop ^ memop) & MO_BSWAP) {
190         switch (orig_memop & MO_SIZE) {
191         case MO_16:
192             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
193                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
194                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
195             break;
196         case MO_32:
197             tcg_gen_bswap32_i32(val, val);
198             break;
199         default:
200             g_assert_not_reached();
201         }
202     }
203 }
204 
205 void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
206                              MemOp memop, TCGType addr_type)
207 {
208     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
209     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
210     tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
211 }
212 
213 static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
214                                     TCGArg idx, MemOp memop)
215 {
216     TCGv_i32 swap = NULL;
217     MemOpIdx orig_oi, oi;
218     TCGOpcode opc;
219 
220     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
221     memop = tcg_canonicalize_memop(memop, 0, 1);
222     orig_oi = oi = make_memop_idx(memop, idx);
223 
224     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
225         swap = tcg_temp_ebb_new_i32();
226         switch (memop & MO_SIZE) {
227         case MO_16:
228             tcg_gen_bswap16_i32(swap, val, 0);
229             break;
230         case MO_32:
231             tcg_gen_bswap32_i32(swap, val);
232             break;
233         default:
234             g_assert_not_reached();
235         }
236         val = swap;
237         memop &= ~MO_BSWAP;
238         oi = make_memop_idx(memop, idx);
239     }
240 
241     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
242         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
243             opc = INDEX_op_qemu_st8_a32_i32;
244         } else {
245             opc = INDEX_op_qemu_st8_a64_i32;
246         }
247     } else {
248         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
249             opc = INDEX_op_qemu_st_a32_i32;
250         } else {
251             opc = INDEX_op_qemu_st_a64_i32;
252         }
253     }
254     gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
255     plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
256 
257     if (swap) {
258         tcg_temp_free_i32(swap);
259     }
260 }
261 
262 void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
263                              MemOp memop, TCGType addr_type)
264 {
265     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
266     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
267     tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
268 }
269 
270 static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
271                                     TCGArg idx, MemOp memop)
272 {
273     MemOp orig_memop;
274     MemOpIdx orig_oi, oi;
275     TCGv_i64 copy_addr;
276     TCGOpcode opc;
277 
278     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
279         tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
280         if (memop & MO_SIGN) {
281             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
282         } else {
283             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
284         }
285         return;
286     }
287 
288     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
289     orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
290     orig_oi = oi = make_memop_idx(memop, idx);
291 
292     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
293         memop &= ~MO_BSWAP;
294         /* The bswap primitive benefits from zero-extended input.  */
295         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
296             memop &= ~MO_SIGN;
297         }
298         oi = make_memop_idx(memop, idx);
299     }
300 
301     copy_addr = plugin_maybe_preserve_addr(addr);
302     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
303         opc = INDEX_op_qemu_ld_a32_i64;
304     } else {
305         opc = INDEX_op_qemu_ld_a64_i64;
306     }
307     gen_ldst_i64(opc, val, addr, oi);
308     plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
309 
310     if ((orig_memop ^ memop) & MO_BSWAP) {
311         int flags = (orig_memop & MO_SIGN
312                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
313                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
314         switch (orig_memop & MO_SIZE) {
315         case MO_16:
316             tcg_gen_bswap16_i64(val, val, flags);
317             break;
318         case MO_32:
319             tcg_gen_bswap32_i64(val, val, flags);
320             break;
321         case MO_64:
322             tcg_gen_bswap64_i64(val, val);
323             break;
324         default:
325             g_assert_not_reached();
326         }
327     }
328 }
329 
330 void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
331                              MemOp memop, TCGType addr_type)
332 {
333     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
334     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
335     tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
336 }
337 
338 static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
339                                     TCGArg idx, MemOp memop)
340 {
341     TCGv_i64 swap = NULL;
342     MemOpIdx orig_oi, oi;
343     TCGOpcode opc;
344 
345     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
346         tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
347         return;
348     }
349 
350     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
351     memop = tcg_canonicalize_memop(memop, 1, 1);
352     orig_oi = oi = make_memop_idx(memop, idx);
353 
354     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
355         swap = tcg_temp_ebb_new_i64();
356         switch (memop & MO_SIZE) {
357         case MO_16:
358             tcg_gen_bswap16_i64(swap, val, 0);
359             break;
360         case MO_32:
361             tcg_gen_bswap32_i64(swap, val, 0);
362             break;
363         case MO_64:
364             tcg_gen_bswap64_i64(swap, val);
365             break;
366         default:
367             g_assert_not_reached();
368         }
369         val = swap;
370         memop &= ~MO_BSWAP;
371         oi = make_memop_idx(memop, idx);
372     }
373 
374     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
375         opc = INDEX_op_qemu_st_a32_i64;
376     } else {
377         opc = INDEX_op_qemu_st_a64_i64;
378     }
379     gen_ldst_i64(opc, val, addr, oi);
380     plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
381 
382     if (swap) {
383         tcg_temp_free_i64(swap);
384     }
385 }
386 
387 void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
388                              MemOp memop, TCGType addr_type)
389 {
390     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
391     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
392     tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
393 }
394 
395 /*
396  * Return true if @mop, without knowledge of the pointer alignment,
397  * does not require 16-byte atomicity, and it would be adventagous
398  * to avoid a call to a helper function.
399  */
400 static bool use_two_i64_for_i128(MemOp mop)
401 {
402 #ifdef CONFIG_SOFTMMU
403     /* Two softmmu tlb lookups is larger than one function call. */
404     return false;
405 #else
406     /*
407      * For user-only, two 64-bit operations may well be smaller than a call.
408      * Determine if that would be legal for the requested atomicity.
409      */
410     switch (mop & MO_ATOM_MASK) {
411     case MO_ATOM_NONE:
412     case MO_ATOM_IFALIGN_PAIR:
413         return true;
414     case MO_ATOM_IFALIGN:
415     case MO_ATOM_SUBALIGN:
416     case MO_ATOM_WITHIN16:
417     case MO_ATOM_WITHIN16_PAIR:
418         /* In a serialized context, no atomicity is required. */
419         return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
420     default:
421         g_assert_not_reached();
422     }
423 #endif
424 }
425 
426 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
427 {
428     MemOp mop_1 = orig, mop_2;
429 
430     /* Reduce the size to 64-bit. */
431     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
432 
433     /* Retain the alignment constraints of the original. */
434     switch (orig & MO_AMASK) {
435     case MO_UNALN:
436     case MO_ALIGN_2:
437     case MO_ALIGN_4:
438         mop_2 = mop_1;
439         break;
440     case MO_ALIGN_8:
441         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
442         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
443         mop_2 = mop_1;
444         break;
445     case MO_ALIGN:
446         /* Second has 8-byte alignment; first has 16-byte alignment. */
447         mop_2 = mop_1;
448         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
449         break;
450     case MO_ALIGN_16:
451     case MO_ALIGN_32:
452     case MO_ALIGN_64:
453         /* Second has 8-byte alignment; first retains original. */
454         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
455         break;
456     default:
457         g_assert_not_reached();
458     }
459 
460     /* Use a memory ordering implemented by the host. */
461     if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
462         mop_1 &= ~MO_BSWAP;
463         mop_2 &= ~MO_BSWAP;
464     }
465 
466     ret[0] = mop_1;
467     ret[1] = mop_2;
468 }
469 
470 static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
471 {
472     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
473         TCGv_i64 a64 = tcg_temp_ebb_new_i64();
474         tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
475         return a64;
476     }
477     return temp_tcgv_i64(addr);
478 }
479 
480 static void maybe_free_addr64(TCGv_i64 a64)
481 {
482     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
483         tcg_temp_free_i64(a64);
484     }
485 }
486 
487 static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
488                                      TCGArg idx, MemOp memop)
489 {
490     const MemOpIdx orig_oi = make_memop_idx(memop, idx);
491     TCGv_i64 ext_addr = NULL;
492     TCGOpcode opc;
493 
494     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
495 
496     /* TODO: For now, force 32-bit hosts to use the helper. */
497     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
498         TCGv_i64 lo, hi;
499         bool need_bswap = false;
500         MemOpIdx oi = orig_oi;
501 
502         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
503             lo = TCGV128_HIGH(val);
504             hi = TCGV128_LOW(val);
505             oi = make_memop_idx(memop & ~MO_BSWAP, idx);
506             need_bswap = true;
507         } else {
508             lo = TCGV128_LOW(val);
509             hi = TCGV128_HIGH(val);
510         }
511 
512         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
513             opc = INDEX_op_qemu_ld_a32_i128;
514         } else {
515             opc = INDEX_op_qemu_ld_a64_i128;
516         }
517         gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
518 
519         if (need_bswap) {
520             tcg_gen_bswap64_i64(lo, lo);
521             tcg_gen_bswap64_i64(hi, hi);
522         }
523     } else if (use_two_i64_for_i128(memop)) {
524         MemOp mop[2];
525         TCGTemp *addr_p8;
526         TCGv_i64 x, y;
527         bool need_bswap;
528 
529         canonicalize_memop_i128_as_i64(mop, memop);
530         need_bswap = (mop[0] ^ memop) & MO_BSWAP;
531 
532         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
533             opc = INDEX_op_qemu_ld_a32_i64;
534         } else {
535             opc = INDEX_op_qemu_ld_a64_i64;
536         }
537 
538         /*
539          * Since there are no global TCGv_i128, there is no visible state
540          * changed if the second load faults.  Load directly into the two
541          * subwords.
542          */
543         if ((memop & MO_BSWAP) == MO_LE) {
544             x = TCGV128_LOW(val);
545             y = TCGV128_HIGH(val);
546         } else {
547             x = TCGV128_HIGH(val);
548             y = TCGV128_LOW(val);
549         }
550 
551         gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
552 
553         if (need_bswap) {
554             tcg_gen_bswap64_i64(x, x);
555         }
556 
557         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
558             TCGv_i32 t = tcg_temp_ebb_new_i32();
559             tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
560             addr_p8 = tcgv_i32_temp(t);
561         } else {
562             TCGv_i64 t = tcg_temp_ebb_new_i64();
563             tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
564             addr_p8 = tcgv_i64_temp(t);
565         }
566 
567         gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
568         tcg_temp_free_internal(addr_p8);
569 
570         if (need_bswap) {
571             tcg_gen_bswap64_i64(y, y);
572         }
573     } else {
574         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
575             ext_addr = tcg_temp_ebb_new_i64();
576             tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
577             addr = tcgv_i64_temp(ext_addr);
578         }
579         gen_helper_ld_i128(val, cpu_env, temp_tcgv_i64(addr),
580                            tcg_constant_i32(orig_oi));
581     }
582 
583     plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
584 }
585 
586 void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
587                               MemOp memop, TCGType addr_type)
588 {
589     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
590     tcg_debug_assert((memop & MO_SIZE) == MO_128);
591     tcg_debug_assert((memop & MO_SIGN) == 0);
592     tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
593 }
594 
595 static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
596                                      TCGArg idx, MemOp memop)
597 {
598     const MemOpIdx orig_oi = make_memop_idx(memop, idx);
599     TCGv_i64 ext_addr = NULL;
600     TCGOpcode opc;
601 
602     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
603 
604     /* TODO: For now, force 32-bit hosts to use the helper. */
605 
606     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
607         TCGv_i64 lo, hi;
608         MemOpIdx oi = orig_oi;
609         bool need_bswap = false;
610 
611         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
612             lo = tcg_temp_ebb_new_i64();
613             hi = tcg_temp_ebb_new_i64();
614             tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
615             tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
616             oi = make_memop_idx(memop & ~MO_BSWAP, idx);
617             need_bswap = true;
618         } else {
619             lo = TCGV128_LOW(val);
620             hi = TCGV128_HIGH(val);
621         }
622 
623         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
624             opc = INDEX_op_qemu_st_a32_i128;
625         } else {
626             opc = INDEX_op_qemu_st_a64_i128;
627         }
628         gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
629 
630         if (need_bswap) {
631             tcg_temp_free_i64(lo);
632             tcg_temp_free_i64(hi);
633         }
634     } else if (use_two_i64_for_i128(memop)) {
635         MemOp mop[2];
636         TCGTemp *addr_p8;
637         TCGv_i64 x, y, b = NULL;
638 
639         canonicalize_memop_i128_as_i64(mop, memop);
640 
641         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
642             opc = INDEX_op_qemu_st_a32_i64;
643         } else {
644             opc = INDEX_op_qemu_st_a64_i64;
645         }
646 
647         if ((memop & MO_BSWAP) == MO_LE) {
648             x = TCGV128_LOW(val);
649             y = TCGV128_HIGH(val);
650         } else {
651             x = TCGV128_HIGH(val);
652             y = TCGV128_LOW(val);
653         }
654 
655         if ((mop[0] ^ memop) & MO_BSWAP) {
656             b = tcg_temp_ebb_new_i64();
657             tcg_gen_bswap64_i64(b, x);
658             x = b;
659         }
660 
661         gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
662 
663         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
664             TCGv_i32 t = tcg_temp_ebb_new_i32();
665             tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
666             addr_p8 = tcgv_i32_temp(t);
667         } else {
668             TCGv_i64 t = tcg_temp_ebb_new_i64();
669             tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
670             addr_p8 = tcgv_i64_temp(t);
671         }
672 
673         if (b) {
674             tcg_gen_bswap64_i64(b, y);
675             gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
676             tcg_temp_free_i64(b);
677         } else {
678             gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
679         }
680         tcg_temp_free_internal(addr_p8);
681     } else {
682         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
683             ext_addr = tcg_temp_ebb_new_i64();
684             tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
685             addr = tcgv_i64_temp(ext_addr);
686         }
687         gen_helper_st_i128(cpu_env, temp_tcgv_i64(addr), val,
688                            tcg_constant_i32(orig_oi));
689     }
690 
691     plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W);
692 }
693 
694 void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
695                               MemOp memop, TCGType addr_type)
696 {
697     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
698     tcg_debug_assert((memop & MO_SIZE) == MO_128);
699     tcg_debug_assert((memop & MO_SIGN) == 0);
700     tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
701 }
702 
703 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
704 {
705     switch (opc & MO_SSIZE) {
706     case MO_SB:
707         tcg_gen_ext8s_i32(ret, val);
708         break;
709     case MO_UB:
710         tcg_gen_ext8u_i32(ret, val);
711         break;
712     case MO_SW:
713         tcg_gen_ext16s_i32(ret, val);
714         break;
715     case MO_UW:
716         tcg_gen_ext16u_i32(ret, val);
717         break;
718     default:
719         tcg_gen_mov_i32(ret, val);
720         break;
721     }
722 }
723 
724 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
725 {
726     switch (opc & MO_SSIZE) {
727     case MO_SB:
728         tcg_gen_ext8s_i64(ret, val);
729         break;
730     case MO_UB:
731         tcg_gen_ext8u_i64(ret, val);
732         break;
733     case MO_SW:
734         tcg_gen_ext16s_i64(ret, val);
735         break;
736     case MO_UW:
737         tcg_gen_ext16u_i64(ret, val);
738         break;
739     case MO_SL:
740         tcg_gen_ext32s_i64(ret, val);
741         break;
742     case MO_UL:
743         tcg_gen_ext32u_i64(ret, val);
744         break;
745     default:
746         tcg_gen_mov_i64(ret, val);
747         break;
748     }
749 }
750 
751 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
752                                   TCGv_i32, TCGv_i32, TCGv_i32);
753 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
754                                   TCGv_i64, TCGv_i64, TCGv_i32);
755 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
756                                    TCGv_i128, TCGv_i128, TCGv_i32);
757 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
758                                   TCGv_i32, TCGv_i32);
759 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
760                                   TCGv_i64, TCGv_i32);
761 
762 #ifdef CONFIG_ATOMIC64
763 # define WITH_ATOMIC64(X) X,
764 #else
765 # define WITH_ATOMIC64(X)
766 #endif
767 #ifdef CONFIG_CMPXCHG128
768 # define WITH_ATOMIC128(X) X,
769 #else
770 # define WITH_ATOMIC128(X)
771 #endif
772 
773 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
774     [MO_8] = gen_helper_atomic_cmpxchgb,
775     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
776     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
777     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
778     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
779     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
780     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
781     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
782     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
783 };
784 
785 static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
786                                               TCGv_i32 cmpv, TCGv_i32 newv,
787                                               TCGArg idx, MemOp memop)
788 {
789     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
790     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
791 
792     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
793 
794     tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
795     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
796     tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
797     tcg_temp_free_i32(t2);
798 
799     if (memop & MO_SIGN) {
800         tcg_gen_ext_i32(retv, t1, memop);
801     } else {
802         tcg_gen_mov_i32(retv, t1);
803     }
804     tcg_temp_free_i32(t1);
805 }
806 
807 void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
808                                        TCGv_i32 cmpv, TCGv_i32 newv,
809                                        TCGArg idx, MemOp memop,
810                                        TCGType addr_type)
811 {
812     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
813     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
814     tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
815 }
816 
817 static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
818                                            TCGv_i32 cmpv, TCGv_i32 newv,
819                                            TCGArg idx, MemOp memop)
820 {
821     gen_atomic_cx_i32 gen;
822     TCGv_i64 a64;
823     MemOpIdx oi;
824 
825     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
826         tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
827         return;
828     }
829 
830     memop = tcg_canonicalize_memop(memop, 0, 0);
831     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
832     tcg_debug_assert(gen != NULL);
833 
834     oi = make_memop_idx(memop & ~MO_SIGN, idx);
835     a64 = maybe_extend_addr64(addr);
836     gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
837     maybe_free_addr64(a64);
838 
839     if (memop & MO_SIGN) {
840         tcg_gen_ext_i32(retv, retv, memop);
841     }
842 }
843 
844 void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
845                                     TCGv_i32 cmpv, TCGv_i32 newv,
846                                     TCGArg idx, MemOp memop,
847                                     TCGType addr_type)
848 {
849     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
850     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
851     tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
852 }
853 
854 static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
855                                               TCGv_i64 cmpv, TCGv_i64 newv,
856                                               TCGArg idx, MemOp memop)
857 {
858     TCGv_i64 t1, t2;
859 
860     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
861         tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
862                                           TCGV_LOW(newv), idx, memop);
863         if (memop & MO_SIGN) {
864             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
865         } else {
866             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
867         }
868         return;
869     }
870 
871     t1 = tcg_temp_ebb_new_i64();
872     t2 = tcg_temp_ebb_new_i64();
873 
874     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
875 
876     tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
877     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
878     tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
879     tcg_temp_free_i64(t2);
880 
881     if (memop & MO_SIGN) {
882         tcg_gen_ext_i64(retv, t1, memop);
883     } else {
884         tcg_gen_mov_i64(retv, t1);
885     }
886     tcg_temp_free_i64(t1);
887 }
888 
889 void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
890                                        TCGv_i64 cmpv, TCGv_i64 newv,
891                                        TCGArg idx, MemOp memop,
892                                        TCGType addr_type)
893 {
894     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
895     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
896     tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
897 }
898 
899 static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
900                                            TCGv_i64 cmpv, TCGv_i64 newv,
901                                            TCGArg idx, MemOp memop)
902 {
903     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
904         tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
905         return;
906     }
907 
908     if ((memop & MO_SIZE) == MO_64) {
909         gen_atomic_cx_i64 gen;
910 
911         memop = tcg_canonicalize_memop(memop, 1, 0);
912         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
913         if (gen) {
914             MemOpIdx oi = make_memop_idx(memop, idx);
915             TCGv_i64 a64 = maybe_extend_addr64(addr);
916             gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
917             maybe_free_addr64(a64);
918             return;
919         }
920 
921         gen_helper_exit_atomic(cpu_env);
922 
923         /*
924          * Produce a result for a well-formed opcode stream.  This satisfies
925          * liveness for set before used, which happens before this dead code
926          * is removed.
927          */
928         tcg_gen_movi_i64(retv, 0);
929         return;
930     }
931 
932     if (TCG_TARGET_REG_BITS == 32) {
933         tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
934                                        TCGV_LOW(newv), idx, memop);
935         if (memop & MO_SIGN) {
936             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
937         } else {
938             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
939         }
940     } else {
941         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
942         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
943         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
944 
945         tcg_gen_extrl_i64_i32(c32, cmpv);
946         tcg_gen_extrl_i64_i32(n32, newv);
947         tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
948                                        idx, memop & ~MO_SIGN);
949         tcg_temp_free_i32(c32);
950         tcg_temp_free_i32(n32);
951 
952         tcg_gen_extu_i32_i64(retv, r32);
953         tcg_temp_free_i32(r32);
954 
955         if (memop & MO_SIGN) {
956             tcg_gen_ext_i64(retv, retv, memop);
957         }
958     }
959 }
960 
961 void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
962                                     TCGv_i64 cmpv, TCGv_i64 newv,
963                                     TCGArg idx, MemOp memop, TCGType addr_type)
964 {
965     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
966     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
967     tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
968 }
969 
970 static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
971                                                TCGv_i128 cmpv, TCGv_i128 newv,
972                                                TCGArg idx, MemOp memop)
973 {
974     if (TCG_TARGET_REG_BITS == 32) {
975         /* Inline expansion below is simply too large for 32-bit hosts. */
976         MemOpIdx oi = make_memop_idx(memop, idx);
977         TCGv_i64 a64 = maybe_extend_addr64(addr);
978 
979         gen_helper_nonatomic_cmpxchgo(retv, cpu_env, a64, cmpv, newv,
980                                       tcg_constant_i32(oi));
981         maybe_free_addr64(a64);
982     } else {
983         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
984         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
985         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
986         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
987         TCGv_i64 z = tcg_constant_i64(0);
988 
989         tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
990 
991         /* Compare i128 */
992         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
993         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
994         tcg_gen_or_i64(t0, t0, t1);
995 
996         /* tmpv = equal ? newv : oldv */
997         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
998                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
999         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1000                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1001 
1002         /* Unconditional writeback. */
1003         tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
1004         tcg_gen_mov_i128(retv, oldv);
1005 
1006         tcg_temp_free_i64(t0);
1007         tcg_temp_free_i64(t1);
1008         tcg_temp_free_i128(tmpv);
1009         tcg_temp_free_i128(oldv);
1010     }
1011 }
1012 
1013 void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1014                                         TCGv_i128 cmpv, TCGv_i128 newv,
1015                                         TCGArg idx, MemOp memop,
1016                                         TCGType addr_type)
1017 {
1018     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1019     tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1020     tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1021 }
1022 
1023 static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1024                                             TCGv_i128 cmpv, TCGv_i128 newv,
1025                                             TCGArg idx, MemOp memop)
1026 {
1027     gen_atomic_cx_i128 gen;
1028 
1029     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1030         tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1031         return;
1032     }
1033 
1034     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1035     if (gen) {
1036         MemOpIdx oi = make_memop_idx(memop, idx);
1037         TCGv_i64 a64 = maybe_extend_addr64(addr);
1038         gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
1039         maybe_free_addr64(a64);
1040         return;
1041     }
1042 
1043     gen_helper_exit_atomic(cpu_env);
1044 
1045     /*
1046      * Produce a result for a well-formed opcode stream.  This satisfies
1047      * liveness for set before used, which happens before this dead code
1048      * is removed.
1049      */
1050     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1051     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1052 }
1053 
1054 void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1055                                      TCGv_i128 cmpv, TCGv_i128 newv,
1056                                      TCGArg idx, MemOp memop,
1057                                      TCGType addr_type)
1058 {
1059     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1060     tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1061     tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1062 }
1063 
1064 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1065                                 TCGArg idx, MemOp memop, bool new_val,
1066                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1067 {
1068     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1069     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1070 
1071     memop = tcg_canonicalize_memop(memop, 0, 0);
1072 
1073     tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
1074     tcg_gen_ext_i32(t2, val, memop);
1075     gen(t2, t1, t2);
1076     tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
1077 
1078     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1079     tcg_temp_free_i32(t1);
1080     tcg_temp_free_i32(t2);
1081 }
1082 
1083 static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1084                              TCGArg idx, MemOp memop, void * const table[])
1085 {
1086     gen_atomic_op_i32 gen;
1087     TCGv_i64 a64;
1088     MemOpIdx oi;
1089 
1090     memop = tcg_canonicalize_memop(memop, 0, 0);
1091 
1092     gen = table[memop & (MO_SIZE | MO_BSWAP)];
1093     tcg_debug_assert(gen != NULL);
1094 
1095     oi = make_memop_idx(memop & ~MO_SIGN, idx);
1096     a64 = maybe_extend_addr64(addr);
1097     gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
1098     maybe_free_addr64(a64);
1099 
1100     if (memop & MO_SIGN) {
1101         tcg_gen_ext_i32(ret, ret, memop);
1102     }
1103 }
1104 
1105 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1106                                 TCGArg idx, MemOp memop, bool new_val,
1107                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1108 {
1109     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1110     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1111 
1112     memop = tcg_canonicalize_memop(memop, 1, 0);
1113 
1114     tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
1115     tcg_gen_ext_i64(t2, val, memop);
1116     gen(t2, t1, t2);
1117     tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
1118 
1119     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1120     tcg_temp_free_i64(t1);
1121     tcg_temp_free_i64(t2);
1122 }
1123 
1124 static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1125                              TCGArg idx, MemOp memop, void * const table[])
1126 {
1127     memop = tcg_canonicalize_memop(memop, 1, 0);
1128 
1129     if ((memop & MO_SIZE) == MO_64) {
1130         gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
1131 
1132         if (gen) {
1133             MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
1134             TCGv_i64 a64 = maybe_extend_addr64(addr);
1135             gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
1136             maybe_free_addr64(a64);
1137             return;
1138         }
1139 
1140         gen_helper_exit_atomic(cpu_env);
1141         /* Produce a result, so that we have a well-formed opcode stream
1142            with respect to uses of the result in the (dead) code following.  */
1143         tcg_gen_movi_i64(ret, 0);
1144     } else {
1145         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1146         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1147 
1148         tcg_gen_extrl_i64_i32(v32, val);
1149         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1150         tcg_temp_free_i32(v32);
1151 
1152         tcg_gen_extu_i32_i64(ret, r32);
1153         tcg_temp_free_i32(r32);
1154 
1155         if (memop & MO_SIGN) {
1156             tcg_gen_ext_i64(ret, ret, memop);
1157         }
1158     }
1159 }
1160 
1161 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
1162 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
1163     [MO_8] = gen_helper_atomic_##NAME##b,                               \
1164     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
1165     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
1166     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
1167     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
1168     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
1169     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
1170 };                                                                      \
1171 void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr,       \
1172                                      TCGv_i32 val, TCGArg idx,          \
1173                                      MemOp memop, TCGType addr_type)    \
1174 {                                                                       \
1175     tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1176     tcg_debug_assert((memop & MO_SIZE) <= MO_32);                       \
1177     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1178         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
1179     } else {                                                            \
1180         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
1181                             tcg_gen_##OP##_i32);                        \
1182     }                                                                   \
1183 }                                                                       \
1184 void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr,       \
1185                                      TCGv_i64 val, TCGArg idx,          \
1186                                      MemOp memop, TCGType addr_type)    \
1187 {                                                                       \
1188     tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1189     tcg_debug_assert((memop & MO_SIZE) <= MO_64);                       \
1190     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1191         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
1192     } else {                                                            \
1193         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
1194                             tcg_gen_##OP##_i64);                        \
1195     }                                                                   \
1196 }
1197 
1198 GEN_ATOMIC_HELPER(fetch_add, add, 0)
1199 GEN_ATOMIC_HELPER(fetch_and, and, 0)
1200 GEN_ATOMIC_HELPER(fetch_or, or, 0)
1201 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1202 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1203 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1204 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1205 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1206 
1207 GEN_ATOMIC_HELPER(add_fetch, add, 1)
1208 GEN_ATOMIC_HELPER(and_fetch, and, 1)
1209 GEN_ATOMIC_HELPER(or_fetch, or, 1)
1210 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1211 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1212 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1213 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1214 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1215 
1216 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1217 {
1218     tcg_gen_mov_i32(r, b);
1219 }
1220 
1221 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1222 {
1223     tcg_gen_mov_i64(r, b);
1224 }
1225 
1226 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1227 
1228 #undef GEN_ATOMIC_HELPER
1229