xref: /qemu/tcg/tcg-op-ldst.c (revision 33aba058c8fcc9b1581b03a1fbac45d8d91baac6)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "tcg/tcg.h"
27 #include "tcg/tcg-temp-internal.h"
28 #include "tcg/tcg-op-common.h"
29 #include "tcg/tcg-mo.h"
30 #include "exec/translation-block.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 #include "tcg-has.h"
34 #include "tcg-target-mo.h"
35 
36 static void check_max_alignment(unsigned a_bits)
37 {
38     /*
39      * The requested alignment cannot overlap the TLB flags.
40      * FIXME: Must keep the count up-to-date with "exec/tlb-flags.h".
41      */
42     if (tcg_use_softmmu) {
43         tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
44     }
45 }
46 
47 static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
48 {
49     unsigned a_bits = memop_alignment_bits(op);
50 
51     check_max_alignment(a_bits);
52 
53     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
54     if (a_bits == (op & MO_SIZE)) {
55         op = (op & ~MO_AMASK) | MO_ALIGN;
56     }
57 
58     switch (op & MO_SIZE) {
59     case MO_8:
60         op &= ~MO_BSWAP;
61         break;
62     case MO_16:
63         break;
64     case MO_32:
65         if (!is64) {
66             op &= ~MO_SIGN;
67         }
68         break;
69     case MO_64:
70         if (is64) {
71             op &= ~MO_SIGN;
72             break;
73         }
74         /* fall through */
75     default:
76         g_assert_not_reached();
77     }
78     if (st) {
79         op &= ~MO_SIGN;
80     }
81 
82     /* In serial mode, reduce atomicity. */
83     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
84         op &= ~MO_ATOM_MASK;
85         op |= MO_ATOM_NONE;
86     }
87 
88     return op;
89 }
90 
91 static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh,
92                      TCGTemp *addr, MemOpIdx oi)
93 {
94     TCGOp *op;
95 
96     if (vh) {
97         op = tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh),
98                          temp_arg(addr), oi);
99     } else {
100         op = tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi);
101     }
102     TCGOP_FLAGS(op) = get_memop(oi) & MO_SIZE;
103 }
104 
105 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
106 {
107     if (TCG_TARGET_REG_BITS == 32) {
108         TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
109         TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
110         gen_ldst(opc, TCG_TYPE_I64, vl, vh, addr, oi);
111     } else {
112         gen_ldst(opc, TCG_TYPE_I64, tcgv_i64_temp(v), NULL, addr, oi);
113     }
114 }
115 
116 static void tcg_gen_req_mo(TCGBar type)
117 {
118     type &= tcg_ctx->guest_mo;
119     type &= ~TCG_TARGET_DEFAULT_MO;
120     if (type) {
121         tcg_gen_mb(type | TCG_BAR_SC);
122     }
123 }
124 
125 /* Only required for loads, where value might overlap addr. */
126 static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
127 {
128 #ifdef CONFIG_PLUGIN
129     if (tcg_ctx->plugin_insn != NULL) {
130         /* Save a copy of the vaddr for use after a load.  */
131         TCGv_i64 temp = tcg_temp_ebb_new_i64();
132         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
133             tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
134         } else {
135             tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
136         }
137         return temp;
138     }
139 #endif
140     return NULL;
141 }
142 
143 #ifdef CONFIG_PLUGIN
144 static void
145 plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
146                          enum qemu_plugin_mem_rw rw)
147 {
148     if (tcg_ctx->plugin_insn != NULL) {
149         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
150 
151         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
152             if (!copy_addr) {
153                 copy_addr = tcg_temp_ebb_new_i64();
154                 tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
155             }
156             tcg_gen_plugin_mem_cb(copy_addr, info);
157             tcg_temp_free_i64(copy_addr);
158         } else {
159             if (copy_addr) {
160                 tcg_gen_plugin_mem_cb(copy_addr, info);
161                 tcg_temp_free_i64(copy_addr);
162             } else {
163                 tcg_gen_plugin_mem_cb(temp_tcgv_i64(orig_addr), info);
164             }
165         }
166     }
167 }
168 #endif
169 
170 static void
171 plugin_gen_mem_callbacks_i32(TCGv_i32 val,
172                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
173                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
174 {
175 #ifdef CONFIG_PLUGIN
176     if (tcg_ctx->plugin_insn != NULL) {
177         tcg_gen_st_i32(val, tcg_env,
178                        offsetof(CPUState, neg.plugin_mem_value_low) -
179                        sizeof(CPUState) + (HOST_BIG_ENDIAN * 4));
180         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
181     }
182 #endif
183 }
184 
185 static void
186 plugin_gen_mem_callbacks_i64(TCGv_i64 val,
187                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
188                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
189 {
190 #ifdef CONFIG_PLUGIN
191     if (tcg_ctx->plugin_insn != NULL) {
192         tcg_gen_st_i64(val, tcg_env,
193                        offsetof(CPUState, neg.plugin_mem_value_low) -
194                        sizeof(CPUState));
195         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
196     }
197 #endif
198 }
199 
200 static void
201 plugin_gen_mem_callbacks_i128(TCGv_i128 val,
202                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
203                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
204 {
205 #ifdef CONFIG_PLUGIN
206     if (tcg_ctx->plugin_insn != NULL) {
207         tcg_gen_st_i64(TCGV128_LOW(val), tcg_env,
208                        offsetof(CPUState, neg.plugin_mem_value_low) -
209                        sizeof(CPUState));
210         tcg_gen_st_i64(TCGV128_HIGH(val), tcg_env,
211                        offsetof(CPUState, neg.plugin_mem_value_high) -
212                        sizeof(CPUState));
213         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
214     }
215 #endif
216 }
217 
218 static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
219                                     TCGArg idx, MemOp memop)
220 {
221     MemOp orig_memop;
222     MemOpIdx orig_oi, oi;
223     TCGv_i64 copy_addr;
224 
225     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
226     orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
227     orig_oi = oi = make_memop_idx(memop, idx);
228 
229     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
230         memop &= ~MO_BSWAP;
231         /* The bswap primitive benefits from zero-extended input.  */
232         if ((memop & MO_SSIZE) == MO_SW) {
233             memop &= ~MO_SIGN;
234         }
235         oi = make_memop_idx(memop, idx);
236     }
237 
238     copy_addr = plugin_maybe_preserve_addr(addr);
239     gen_ldst(INDEX_op_qemu_ld_i32, TCG_TYPE_I32,
240              tcgv_i32_temp(val), NULL, addr, oi);
241     plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
242                                  QEMU_PLUGIN_MEM_R);
243 
244     if ((orig_memop ^ memop) & MO_BSWAP) {
245         switch (orig_memop & MO_SIZE) {
246         case MO_16:
247             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
248                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
249                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
250             break;
251         case MO_32:
252             tcg_gen_bswap32_i32(val, val);
253             break;
254         default:
255             g_assert_not_reached();
256         }
257     }
258 }
259 
260 void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
261                              MemOp memop, TCGType addr_type)
262 {
263     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
264     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
265     tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
266 }
267 
268 static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
269                                     TCGArg idx, MemOp memop)
270 {
271     TCGv_i32 swap = NULL;
272     MemOpIdx orig_oi, oi;
273 
274     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
275     memop = tcg_canonicalize_memop(memop, 0, 1);
276     orig_oi = oi = make_memop_idx(memop, idx);
277 
278     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
279         swap = tcg_temp_ebb_new_i32();
280         switch (memop & MO_SIZE) {
281         case MO_16:
282             tcg_gen_bswap16_i32(swap, val, 0);
283             break;
284         case MO_32:
285             tcg_gen_bswap32_i32(swap, val);
286             break;
287         default:
288             g_assert_not_reached();
289         }
290         val = swap;
291         memop &= ~MO_BSWAP;
292         oi = make_memop_idx(memop, idx);
293     }
294 
295     gen_ldst(INDEX_op_qemu_st_i32, TCG_TYPE_I32,
296              tcgv_i32_temp(val), NULL, addr, oi);
297     plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
298 
299     if (swap) {
300         tcg_temp_free_i32(swap);
301     }
302 }
303 
304 void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
305                              MemOp memop, TCGType addr_type)
306 {
307     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
308     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
309     tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
310 }
311 
312 static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
313                                     TCGArg idx, MemOp memop)
314 {
315     MemOp orig_memop;
316     MemOpIdx orig_oi, oi;
317     TCGv_i64 copy_addr;
318 
319     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
320         tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
321         if (memop & MO_SIGN) {
322             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
323         } else {
324             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
325         }
326         return;
327     }
328 
329     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
330     orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
331     orig_oi = oi = make_memop_idx(memop, idx);
332 
333     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
334         memop &= ~MO_BSWAP;
335         /* The bswap primitive benefits from zero-extended input.  */
336         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
337             memop &= ~MO_SIGN;
338         }
339         oi = make_memop_idx(memop, idx);
340     }
341 
342     copy_addr = plugin_maybe_preserve_addr(addr);
343     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
344     plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi,
345                                  QEMU_PLUGIN_MEM_R);
346 
347     if ((orig_memop ^ memop) & MO_BSWAP) {
348         int flags = (orig_memop & MO_SIGN
349                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
350                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
351         switch (orig_memop & MO_SIZE) {
352         case MO_16:
353             tcg_gen_bswap16_i64(val, val, flags);
354             break;
355         case MO_32:
356             tcg_gen_bswap32_i64(val, val, flags);
357             break;
358         case MO_64:
359             tcg_gen_bswap64_i64(val, val);
360             break;
361         default:
362             g_assert_not_reached();
363         }
364     }
365 }
366 
367 void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
368                              MemOp memop, TCGType addr_type)
369 {
370     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
371     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
372     tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
373 }
374 
375 static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
376                                     TCGArg idx, MemOp memop)
377 {
378     TCGv_i64 swap = NULL;
379     MemOpIdx orig_oi, oi;
380 
381     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
382         tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
383         return;
384     }
385 
386     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
387     memop = tcg_canonicalize_memop(memop, 1, 1);
388     orig_oi = oi = make_memop_idx(memop, idx);
389 
390     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
391         swap = tcg_temp_ebb_new_i64();
392         switch (memop & MO_SIZE) {
393         case MO_16:
394             tcg_gen_bswap16_i64(swap, val, 0);
395             break;
396         case MO_32:
397             tcg_gen_bswap32_i64(swap, val, 0);
398             break;
399         case MO_64:
400             tcg_gen_bswap64_i64(swap, val);
401             break;
402         default:
403             g_assert_not_reached();
404         }
405         val = swap;
406         memop &= ~MO_BSWAP;
407         oi = make_memop_idx(memop, idx);
408     }
409 
410     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
411     plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
412 
413     if (swap) {
414         tcg_temp_free_i64(swap);
415     }
416 }
417 
418 void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
419                              MemOp memop, TCGType addr_type)
420 {
421     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
422     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
423     tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
424 }
425 
426 /*
427  * Return true if @mop, without knowledge of the pointer alignment,
428  * does not require 16-byte atomicity, and it would be adventagous
429  * to avoid a call to a helper function.
430  */
431 static bool use_two_i64_for_i128(MemOp mop)
432 {
433     /* Two softmmu tlb lookups is larger than one function call. */
434     if (tcg_use_softmmu) {
435         return false;
436     }
437 
438     /*
439      * For user-only, two 64-bit operations may well be smaller than a call.
440      * Determine if that would be legal for the requested atomicity.
441      */
442     switch (mop & MO_ATOM_MASK) {
443     case MO_ATOM_NONE:
444     case MO_ATOM_IFALIGN_PAIR:
445         return true;
446     case MO_ATOM_IFALIGN:
447     case MO_ATOM_SUBALIGN:
448     case MO_ATOM_WITHIN16:
449     case MO_ATOM_WITHIN16_PAIR:
450         return false;
451     default:
452         g_assert_not_reached();
453     }
454 }
455 
456 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
457 {
458     MemOp mop_1 = orig, mop_2;
459 
460     /* Reduce the size to 64-bit. */
461     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
462 
463     /* Retain the alignment constraints of the original. */
464     switch (orig & MO_AMASK) {
465     case MO_UNALN:
466     case MO_ALIGN_2:
467     case MO_ALIGN_4:
468         mop_2 = mop_1;
469         break;
470     case MO_ALIGN_8:
471         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
472         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
473         mop_2 = mop_1;
474         break;
475     case MO_ALIGN:
476         /* Second has 8-byte alignment; first has 16-byte alignment. */
477         mop_2 = mop_1;
478         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
479         break;
480     case MO_ALIGN_16:
481     case MO_ALIGN_32:
482     case MO_ALIGN_64:
483         /* Second has 8-byte alignment; first retains original. */
484         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
485         break;
486     default:
487         g_assert_not_reached();
488     }
489 
490     /* Use a memory ordering implemented by the host. */
491     if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
492         mop_1 &= ~MO_BSWAP;
493         mop_2 &= ~MO_BSWAP;
494     }
495 
496     ret[0] = mop_1;
497     ret[1] = mop_2;
498 }
499 
500 static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
501 {
502     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
503         TCGv_i64 a64 = tcg_temp_ebb_new_i64();
504         tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
505         return a64;
506     }
507     return temp_tcgv_i64(addr);
508 }
509 
510 static void maybe_free_addr64(TCGv_i64 a64)
511 {
512     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
513         tcg_temp_free_i64(a64);
514     }
515 }
516 
517 static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
518                                      TCGArg idx, MemOp memop)
519 {
520     MemOpIdx orig_oi;
521     TCGv_i64 ext_addr = NULL;
522 
523     check_max_alignment(memop_alignment_bits(memop));
524     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
525 
526     /* In serial mode, reduce atomicity. */
527     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
528         memop &= ~MO_ATOM_MASK;
529         memop |= MO_ATOM_NONE;
530     }
531     orig_oi = make_memop_idx(memop, idx);
532 
533     /* TODO: For now, force 32-bit hosts to use the helper. */
534     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
535         TCGv_i64 lo, hi;
536         bool need_bswap = false;
537         MemOpIdx oi = orig_oi;
538 
539         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
540             lo = TCGV128_HIGH(val);
541             hi = TCGV128_LOW(val);
542             oi = make_memop_idx(memop & ~MO_BSWAP, idx);
543             need_bswap = true;
544         } else {
545             lo = TCGV128_LOW(val);
546             hi = TCGV128_HIGH(val);
547         }
548 
549         gen_ldst(INDEX_op_qemu_ld_i128, TCG_TYPE_I128, tcgv_i64_temp(lo),
550                  tcgv_i64_temp(hi), addr, oi);
551 
552         if (need_bswap) {
553             tcg_gen_bswap64_i64(lo, lo);
554             tcg_gen_bswap64_i64(hi, hi);
555         }
556     } else if (use_two_i64_for_i128(memop)) {
557         MemOp mop[2];
558         TCGTemp *addr_p8;
559         TCGv_i64 x, y;
560         bool need_bswap;
561 
562         canonicalize_memop_i128_as_i64(mop, memop);
563         need_bswap = (mop[0] ^ memop) & MO_BSWAP;
564 
565         /*
566          * Since there are no global TCGv_i128, there is no visible state
567          * changed if the second load faults.  Load directly into the two
568          * subwords.
569          */
570         if ((memop & MO_BSWAP) == MO_LE) {
571             x = TCGV128_LOW(val);
572             y = TCGV128_HIGH(val);
573         } else {
574             x = TCGV128_HIGH(val);
575             y = TCGV128_LOW(val);
576         }
577 
578         gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr,
579                      make_memop_idx(mop[0], idx));
580 
581         if (need_bswap) {
582             tcg_gen_bswap64_i64(x, x);
583         }
584 
585         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
586             TCGv_i32 t = tcg_temp_ebb_new_i32();
587             tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
588             addr_p8 = tcgv_i32_temp(t);
589         } else {
590             TCGv_i64 t = tcg_temp_ebb_new_i64();
591             tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
592             addr_p8 = tcgv_i64_temp(t);
593         }
594 
595         gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8,
596                      make_memop_idx(mop[1], idx));
597         tcg_temp_free_internal(addr_p8);
598 
599         if (need_bswap) {
600             tcg_gen_bswap64_i64(y, y);
601         }
602     } else {
603         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
604             ext_addr = tcg_temp_ebb_new_i64();
605             tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
606             addr = tcgv_i64_temp(ext_addr);
607         }
608         gen_helper_ld_i128(val, tcg_env, temp_tcgv_i64(addr),
609                            tcg_constant_i32(orig_oi));
610     }
611 
612     plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
613                                   QEMU_PLUGIN_MEM_R);
614 }
615 
616 void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
617                               MemOp memop, TCGType addr_type)
618 {
619     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
620     tcg_debug_assert((memop & MO_SIZE) == MO_128);
621     tcg_debug_assert((memop & MO_SIGN) == 0);
622     tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
623 }
624 
625 static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
626                                      TCGArg idx, MemOp memop)
627 {
628     MemOpIdx orig_oi;
629     TCGv_i64 ext_addr = NULL;
630 
631     check_max_alignment(memop_alignment_bits(memop));
632     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
633 
634     /* In serial mode, reduce atomicity. */
635     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
636         memop &= ~MO_ATOM_MASK;
637         memop |= MO_ATOM_NONE;
638     }
639     orig_oi = make_memop_idx(memop, idx);
640 
641     /* TODO: For now, force 32-bit hosts to use the helper. */
642 
643     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
644         TCGv_i64 lo, hi;
645         MemOpIdx oi = orig_oi;
646         bool need_bswap = false;
647 
648         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
649             lo = tcg_temp_ebb_new_i64();
650             hi = tcg_temp_ebb_new_i64();
651             tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
652             tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
653             oi = make_memop_idx(memop & ~MO_BSWAP, idx);
654             need_bswap = true;
655         } else {
656             lo = TCGV128_LOW(val);
657             hi = TCGV128_HIGH(val);
658         }
659 
660         gen_ldst(INDEX_op_qemu_st_i128, TCG_TYPE_I128,
661                  tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
662 
663         if (need_bswap) {
664             tcg_temp_free_i64(lo);
665             tcg_temp_free_i64(hi);
666         }
667     } else if (use_two_i64_for_i128(memop)) {
668         MemOp mop[2];
669         TCGTemp *addr_p8;
670         TCGv_i64 x, y, b = NULL;
671 
672         canonicalize_memop_i128_as_i64(mop, memop);
673 
674         if ((memop & MO_BSWAP) == MO_LE) {
675             x = TCGV128_LOW(val);
676             y = TCGV128_HIGH(val);
677         } else {
678             x = TCGV128_HIGH(val);
679             y = TCGV128_LOW(val);
680         }
681 
682         if ((mop[0] ^ memop) & MO_BSWAP) {
683             b = tcg_temp_ebb_new_i64();
684             tcg_gen_bswap64_i64(b, x);
685             x = b;
686         }
687 
688         gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
689                      make_memop_idx(mop[0], idx));
690 
691         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
692             TCGv_i32 t = tcg_temp_ebb_new_i32();
693             tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
694             addr_p8 = tcgv_i32_temp(t);
695         } else {
696             TCGv_i64 t = tcg_temp_ebb_new_i64();
697             tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
698             addr_p8 = tcgv_i64_temp(t);
699         }
700 
701         if (b) {
702             tcg_gen_bswap64_i64(b, y);
703             gen_ldst_i64(INDEX_op_qemu_st_i64, b, addr_p8,
704                          make_memop_idx(mop[1], idx));
705             tcg_temp_free_i64(b);
706         } else {
707             gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
708                          make_memop_idx(mop[1], idx));
709         }
710         tcg_temp_free_internal(addr_p8);
711     } else {
712         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
713             ext_addr = tcg_temp_ebb_new_i64();
714             tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
715             addr = tcgv_i64_temp(ext_addr);
716         }
717         gen_helper_st_i128(tcg_env, temp_tcgv_i64(addr), val,
718                            tcg_constant_i32(orig_oi));
719     }
720 
721     plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
722                                   QEMU_PLUGIN_MEM_W);
723 }
724 
725 void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
726                               MemOp memop, TCGType addr_type)
727 {
728     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
729     tcg_debug_assert((memop & MO_SIZE) == MO_128);
730     tcg_debug_assert((memop & MO_SIGN) == 0);
731     tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
732 }
733 
734 void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
735 {
736     switch (opc & MO_SSIZE) {
737     case MO_SB:
738         tcg_gen_ext8s_i32(ret, val);
739         break;
740     case MO_UB:
741         tcg_gen_ext8u_i32(ret, val);
742         break;
743     case MO_SW:
744         tcg_gen_ext16s_i32(ret, val);
745         break;
746     case MO_UW:
747         tcg_gen_ext16u_i32(ret, val);
748         break;
749     case MO_UL:
750     case MO_SL:
751         tcg_gen_mov_i32(ret, val);
752         break;
753     default:
754         g_assert_not_reached();
755     }
756 }
757 
758 void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
759 {
760     switch (opc & MO_SSIZE) {
761     case MO_SB:
762         tcg_gen_ext8s_i64(ret, val);
763         break;
764     case MO_UB:
765         tcg_gen_ext8u_i64(ret, val);
766         break;
767     case MO_SW:
768         tcg_gen_ext16s_i64(ret, val);
769         break;
770     case MO_UW:
771         tcg_gen_ext16u_i64(ret, val);
772         break;
773     case MO_SL:
774         tcg_gen_ext32s_i64(ret, val);
775         break;
776     case MO_UL:
777         tcg_gen_ext32u_i64(ret, val);
778         break;
779     case MO_UQ:
780     case MO_SQ:
781         tcg_gen_mov_i64(ret, val);
782         break;
783     default:
784         g_assert_not_reached();
785     }
786 }
787 
788 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
789                                   TCGv_i32, TCGv_i32, TCGv_i32);
790 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
791                                   TCGv_i64, TCGv_i64, TCGv_i32);
792 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
793                                    TCGv_i128, TCGv_i128, TCGv_i32);
794 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
795                                   TCGv_i32, TCGv_i32);
796 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
797                                   TCGv_i64, TCGv_i32);
798 
799 #ifdef CONFIG_ATOMIC64
800 # define WITH_ATOMIC64(X) X,
801 #else
802 # define WITH_ATOMIC64(X)
803 #endif
804 #if HAVE_CMPXCHG128
805 # define WITH_ATOMIC128(X) X,
806 #else
807 # define WITH_ATOMIC128(X)
808 #endif
809 
810 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
811     [MO_8] = gen_helper_atomic_cmpxchgb,
812     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
813     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
814     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
815     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
816     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
817     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
818     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
819     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
820 };
821 
822 static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
823                                               TCGv_i32 cmpv, TCGv_i32 newv,
824                                               TCGArg idx, MemOp memop)
825 {
826     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
827     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
828 
829     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
830 
831     tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
832     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
833     tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
834     tcg_temp_free_i32(t2);
835 
836     if (memop & MO_SIGN) {
837         tcg_gen_ext_i32(retv, t1, memop);
838     } else {
839         tcg_gen_mov_i32(retv, t1);
840     }
841     tcg_temp_free_i32(t1);
842 }
843 
844 void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
845                                        TCGv_i32 cmpv, TCGv_i32 newv,
846                                        TCGArg idx, MemOp memop,
847                                        TCGType addr_type)
848 {
849     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
850     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
851     tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
852 }
853 
854 static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
855                                            TCGv_i32 cmpv, TCGv_i32 newv,
856                                            TCGArg idx, MemOp memop)
857 {
858     gen_atomic_cx_i32 gen;
859     TCGv_i64 a64;
860     MemOpIdx oi;
861 
862     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
863         tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
864         return;
865     }
866 
867     memop = tcg_canonicalize_memop(memop, 0, 0);
868     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
869     tcg_debug_assert(gen != NULL);
870 
871     oi = make_memop_idx(memop & ~MO_SIGN, idx);
872     a64 = maybe_extend_addr64(addr);
873     gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
874     maybe_free_addr64(a64);
875 
876     if (memop & MO_SIGN) {
877         tcg_gen_ext_i32(retv, retv, memop);
878     }
879 }
880 
881 void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
882                                     TCGv_i32 cmpv, TCGv_i32 newv,
883                                     TCGArg idx, MemOp memop,
884                                     TCGType addr_type)
885 {
886     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
887     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
888     tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
889 }
890 
891 static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
892                                               TCGv_i64 cmpv, TCGv_i64 newv,
893                                               TCGArg idx, MemOp memop)
894 {
895     TCGv_i64 t1, t2;
896 
897     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
898         tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
899                                           TCGV_LOW(newv), idx, memop);
900         if (memop & MO_SIGN) {
901             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
902         } else {
903             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
904         }
905         return;
906     }
907 
908     t1 = tcg_temp_ebb_new_i64();
909     t2 = tcg_temp_ebb_new_i64();
910 
911     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
912 
913     tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
914     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
915     tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
916     tcg_temp_free_i64(t2);
917 
918     if (memop & MO_SIGN) {
919         tcg_gen_ext_i64(retv, t1, memop);
920     } else {
921         tcg_gen_mov_i64(retv, t1);
922     }
923     tcg_temp_free_i64(t1);
924 }
925 
926 void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
927                                        TCGv_i64 cmpv, TCGv_i64 newv,
928                                        TCGArg idx, MemOp memop,
929                                        TCGType addr_type)
930 {
931     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
932     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
933     tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
934 }
935 
936 static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
937                                            TCGv_i64 cmpv, TCGv_i64 newv,
938                                            TCGArg idx, MemOp memop)
939 {
940     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
941         tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
942         return;
943     }
944 
945     if ((memop & MO_SIZE) == MO_64) {
946         gen_atomic_cx_i64 gen;
947 
948         memop = tcg_canonicalize_memop(memop, 1, 0);
949         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
950         if (gen) {
951             MemOpIdx oi = make_memop_idx(memop, idx);
952             TCGv_i64 a64 = maybe_extend_addr64(addr);
953             gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
954             maybe_free_addr64(a64);
955             return;
956         }
957 
958         gen_helper_exit_atomic(tcg_env);
959 
960         /*
961          * Produce a result for a well-formed opcode stream.  This satisfies
962          * liveness for set before used, which happens before this dead code
963          * is removed.
964          */
965         tcg_gen_movi_i64(retv, 0);
966         return;
967     }
968 
969     if (TCG_TARGET_REG_BITS == 32) {
970         tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
971                                        TCGV_LOW(newv), idx, memop);
972         if (memop & MO_SIGN) {
973             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
974         } else {
975             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
976         }
977     } else {
978         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
979         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
980         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
981 
982         tcg_gen_extrl_i64_i32(c32, cmpv);
983         tcg_gen_extrl_i64_i32(n32, newv);
984         tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
985                                        idx, memop & ~MO_SIGN);
986         tcg_temp_free_i32(c32);
987         tcg_temp_free_i32(n32);
988 
989         tcg_gen_extu_i32_i64(retv, r32);
990         tcg_temp_free_i32(r32);
991 
992         if (memop & MO_SIGN) {
993             tcg_gen_ext_i64(retv, retv, memop);
994         }
995     }
996 }
997 
998 void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
999                                     TCGv_i64 cmpv, TCGv_i64 newv,
1000                                     TCGArg idx, MemOp memop, TCGType addr_type)
1001 {
1002     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1003     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
1004     tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
1005 }
1006 
1007 static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1008                                                TCGv_i128 cmpv, TCGv_i128 newv,
1009                                                TCGArg idx, MemOp memop)
1010 {
1011     if (TCG_TARGET_REG_BITS == 32) {
1012         /* Inline expansion below is simply too large for 32-bit hosts. */
1013         MemOpIdx oi = make_memop_idx(memop, idx);
1014         TCGv_i64 a64 = maybe_extend_addr64(addr);
1015 
1016         gen_helper_nonatomic_cmpxchgo(retv, tcg_env, a64, cmpv, newv,
1017                                       tcg_constant_i32(oi));
1018         maybe_free_addr64(a64);
1019     } else {
1020         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1021         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1022         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1023         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1024         TCGv_i64 z = tcg_constant_i64(0);
1025 
1026         tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
1027 
1028         /* Compare i128 */
1029         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
1030         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
1031         tcg_gen_or_i64(t0, t0, t1);
1032 
1033         /* tmpv = equal ? newv : oldv */
1034         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
1035                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
1036         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1037                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1038 
1039         /* Unconditional writeback. */
1040         tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
1041         tcg_gen_mov_i128(retv, oldv);
1042 
1043         tcg_temp_free_i64(t0);
1044         tcg_temp_free_i64(t1);
1045         tcg_temp_free_i128(tmpv);
1046         tcg_temp_free_i128(oldv);
1047     }
1048 }
1049 
1050 void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1051                                         TCGv_i128 cmpv, TCGv_i128 newv,
1052                                         TCGArg idx, MemOp memop,
1053                                         TCGType addr_type)
1054 {
1055     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1056     tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1057     tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1058 }
1059 
1060 static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1061                                             TCGv_i128 cmpv, TCGv_i128 newv,
1062                                             TCGArg idx, MemOp memop)
1063 {
1064     gen_atomic_cx_i128 gen;
1065 
1066     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1067         tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1068         return;
1069     }
1070 
1071     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1072     if (gen) {
1073         MemOpIdx oi = make_memop_idx(memop, idx);
1074         TCGv_i64 a64 = maybe_extend_addr64(addr);
1075         gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
1076         maybe_free_addr64(a64);
1077         return;
1078     }
1079 
1080     gen_helper_exit_atomic(tcg_env);
1081 
1082     /*
1083      * Produce a result for a well-formed opcode stream.  This satisfies
1084      * liveness for set before used, which happens before this dead code
1085      * is removed.
1086      */
1087     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1088     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1089 }
1090 
1091 void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1092                                      TCGv_i128 cmpv, TCGv_i128 newv,
1093                                      TCGArg idx, MemOp memop,
1094                                      TCGType addr_type)
1095 {
1096     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1097     tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1098     tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1099 }
1100 
1101 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1102                                 TCGArg idx, MemOp memop, bool new_val,
1103                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1104 {
1105     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1106     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1107 
1108     memop = tcg_canonicalize_memop(memop, 0, 0);
1109 
1110     tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
1111     tcg_gen_ext_i32(t2, val, memop);
1112     gen(t2, t1, t2);
1113     tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
1114 
1115     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1116     tcg_temp_free_i32(t1);
1117     tcg_temp_free_i32(t2);
1118 }
1119 
1120 static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1121                              TCGArg idx, MemOp memop, void * const table[])
1122 {
1123     gen_atomic_op_i32 gen;
1124     TCGv_i64 a64;
1125     MemOpIdx oi;
1126 
1127     memop = tcg_canonicalize_memop(memop, 0, 0);
1128 
1129     gen = table[memop & (MO_SIZE | MO_BSWAP)];
1130     tcg_debug_assert(gen != NULL);
1131 
1132     oi = make_memop_idx(memop & ~MO_SIGN, idx);
1133     a64 = maybe_extend_addr64(addr);
1134     gen(ret, tcg_env, a64, val, tcg_constant_i32(oi));
1135     maybe_free_addr64(a64);
1136 
1137     if (memop & MO_SIGN) {
1138         tcg_gen_ext_i32(ret, ret, memop);
1139     }
1140 }
1141 
1142 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1143                                 TCGArg idx, MemOp memop, bool new_val,
1144                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1145 {
1146     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1147     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1148 
1149     memop = tcg_canonicalize_memop(memop, 1, 0);
1150 
1151     tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
1152     tcg_gen_ext_i64(t2, val, memop);
1153     gen(t2, t1, t2);
1154     tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
1155 
1156     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1157     tcg_temp_free_i64(t1);
1158     tcg_temp_free_i64(t2);
1159 }
1160 
1161 static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1162                              TCGArg idx, MemOp memop, void * const table[])
1163 {
1164     memop = tcg_canonicalize_memop(memop, 1, 0);
1165 
1166     if ((memop & MO_SIZE) == MO_64) {
1167         gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
1168 
1169         if (gen) {
1170             MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
1171             TCGv_i64 a64 = maybe_extend_addr64(addr);
1172             gen(ret, tcg_env, a64, val, tcg_constant_i32(oi));
1173             maybe_free_addr64(a64);
1174             return;
1175         }
1176 
1177         gen_helper_exit_atomic(tcg_env);
1178         /* Produce a result, so that we have a well-formed opcode stream
1179            with respect to uses of the result in the (dead) code following.  */
1180         tcg_gen_movi_i64(ret, 0);
1181     } else {
1182         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1183         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1184 
1185         tcg_gen_extrl_i64_i32(v32, val);
1186         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1187         tcg_temp_free_i32(v32);
1188 
1189         tcg_gen_extu_i32_i64(ret, r32);
1190         tcg_temp_free_i32(r32);
1191 
1192         if (memop & MO_SIGN) {
1193             tcg_gen_ext_i64(ret, ret, memop);
1194         }
1195     }
1196 }
1197 
1198 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
1199 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
1200     [MO_8] = gen_helper_atomic_##NAME##b,                               \
1201     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
1202     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
1203     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
1204     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
1205     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
1206     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
1207 };                                                                      \
1208 void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr,       \
1209                                      TCGv_i32 val, TCGArg idx,          \
1210                                      MemOp memop, TCGType addr_type)    \
1211 {                                                                       \
1212     tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1213     tcg_debug_assert((memop & MO_SIZE) <= MO_32);                       \
1214     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1215         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
1216     } else {                                                            \
1217         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
1218                             tcg_gen_##OP##_i32);                        \
1219     }                                                                   \
1220 }                                                                       \
1221 void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr,       \
1222                                      TCGv_i64 val, TCGArg idx,          \
1223                                      MemOp memop, TCGType addr_type)    \
1224 {                                                                       \
1225     tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1226     tcg_debug_assert((memop & MO_SIZE) <= MO_64);                       \
1227     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1228         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
1229     } else {                                                            \
1230         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
1231                             tcg_gen_##OP##_i64);                        \
1232     }                                                                   \
1233 }
1234 
1235 GEN_ATOMIC_HELPER(fetch_add, add, 0)
1236 GEN_ATOMIC_HELPER(fetch_and, and, 0)
1237 GEN_ATOMIC_HELPER(fetch_or, or, 0)
1238 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1239 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1240 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1241 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1242 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1243 
1244 GEN_ATOMIC_HELPER(add_fetch, add, 1)
1245 GEN_ATOMIC_HELPER(and_fetch, and, 1)
1246 GEN_ATOMIC_HELPER(or_fetch, or, 1)
1247 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1248 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1249 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1250 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1251 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1252 
1253 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1254 {
1255     tcg_gen_mov_i32(r, b);
1256 }
1257 
1258 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1259 {
1260     tcg_gen_mov_i64(r, b);
1261 }
1262 
1263 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1264 
1265 #undef GEN_ATOMIC_HELPER
1266