xref: /qemu/tcg/tcg-op-ldst.c (revision 93280b67381148d6b8b25f54f32901f868987c84)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "tcg/tcg.h"
27 #include "tcg/tcg-temp-internal.h"
28 #include "tcg/tcg-op-common.h"
29 #include "tcg/tcg-mo.h"
30 #include "exec/translation-block.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 #include "tcg-has.h"
34 
35 static void check_max_alignment(unsigned a_bits)
36 {
37     /*
38      * The requested alignment cannot overlap the TLB flags.
39      * FIXME: Must keep the count up-to-date with "exec/cpu-all.h".
40      */
41     if (tcg_use_softmmu) {
42         tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
43     }
44 }
45 
46 static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
47 {
48     unsigned a_bits = memop_alignment_bits(op);
49 
50     check_max_alignment(a_bits);
51 
52     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
53     if (a_bits == (op & MO_SIZE)) {
54         op = (op & ~MO_AMASK) | MO_ALIGN;
55     }
56 
57     switch (op & MO_SIZE) {
58     case MO_8:
59         op &= ~MO_BSWAP;
60         break;
61     case MO_16:
62         break;
63     case MO_32:
64         if (!is64) {
65             op &= ~MO_SIGN;
66         }
67         break;
68     case MO_64:
69         if (is64) {
70             op &= ~MO_SIGN;
71             break;
72         }
73         /* fall through */
74     default:
75         g_assert_not_reached();
76     }
77     if (st) {
78         op &= ~MO_SIGN;
79     }
80 
81     /* In serial mode, reduce atomicity. */
82     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
83         op &= ~MO_ATOM_MASK;
84         op |= MO_ATOM_NONE;
85     }
86 
87     return op;
88 }
89 
90 static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh,
91                      TCGTemp *addr, MemOpIdx oi)
92 {
93     if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
94         if (vh) {
95             tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh),
96                         temp_arg(addr), oi);
97         } else {
98             tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi);
99         }
100     } else {
101         /* See TCGV_LOW/HIGH. */
102         TCGTemp *al = addr + HOST_BIG_ENDIAN;
103         TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
104 
105         if (vh) {
106             tcg_gen_op5(opc, type, temp_arg(vl), temp_arg(vh),
107                         temp_arg(al), temp_arg(ah), oi);
108         } else {
109             tcg_gen_op4(opc, type, temp_arg(vl),
110                         temp_arg(al), temp_arg(ah), oi);
111         }
112     }
113 }
114 
115 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
116 {
117     if (TCG_TARGET_REG_BITS == 32) {
118         TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
119         TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
120         gen_ldst(opc, TCG_TYPE_I64, vl, vh, addr, oi);
121     } else {
122         gen_ldst(opc, TCG_TYPE_I64, tcgv_i64_temp(v), NULL, addr, oi);
123     }
124 }
125 
126 static void tcg_gen_req_mo(TCGBar type)
127 {
128     type &= tcg_ctx->guest_mo;
129     type &= ~TCG_TARGET_DEFAULT_MO;
130     if (type) {
131         tcg_gen_mb(type | TCG_BAR_SC);
132     }
133 }
134 
135 /* Only required for loads, where value might overlap addr. */
136 static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
137 {
138 #ifdef CONFIG_PLUGIN
139     if (tcg_ctx->plugin_insn != NULL) {
140         /* Save a copy of the vaddr for use after a load.  */
141         TCGv_i64 temp = tcg_temp_ebb_new_i64();
142         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
143             tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
144         } else {
145             tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
146         }
147         return temp;
148     }
149 #endif
150     return NULL;
151 }
152 
153 #ifdef CONFIG_PLUGIN
154 static void
155 plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
156                          enum qemu_plugin_mem_rw rw)
157 {
158     if (tcg_ctx->plugin_insn != NULL) {
159         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
160 
161         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
162             if (!copy_addr) {
163                 copy_addr = tcg_temp_ebb_new_i64();
164                 tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
165             }
166             tcg_gen_plugin_mem_cb(copy_addr, info);
167             tcg_temp_free_i64(copy_addr);
168         } else {
169             if (copy_addr) {
170                 tcg_gen_plugin_mem_cb(copy_addr, info);
171                 tcg_temp_free_i64(copy_addr);
172             } else {
173                 tcg_gen_plugin_mem_cb(temp_tcgv_i64(orig_addr), info);
174             }
175         }
176     }
177 }
178 #endif
179 
180 static void
181 plugin_gen_mem_callbacks_i32(TCGv_i32 val,
182                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
183                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
184 {
185 #ifdef CONFIG_PLUGIN
186     if (tcg_ctx->plugin_insn != NULL) {
187         tcg_gen_st_i32(val, tcg_env,
188                        offsetof(CPUState, neg.plugin_mem_value_low) -
189                        sizeof(CPUState) + (HOST_BIG_ENDIAN * 4));
190         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
191     }
192 #endif
193 }
194 
195 static void
196 plugin_gen_mem_callbacks_i64(TCGv_i64 val,
197                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
198                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
199 {
200 #ifdef CONFIG_PLUGIN
201     if (tcg_ctx->plugin_insn != NULL) {
202         tcg_gen_st_i64(val, tcg_env,
203                        offsetof(CPUState, neg.plugin_mem_value_low) -
204                        sizeof(CPUState));
205         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
206     }
207 #endif
208 }
209 
210 static void
211 plugin_gen_mem_callbacks_i128(TCGv_i128 val,
212                              TCGv_i64 copy_addr, TCGTemp *orig_addr,
213                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
214 {
215 #ifdef CONFIG_PLUGIN
216     if (tcg_ctx->plugin_insn != NULL) {
217         tcg_gen_st_i64(TCGV128_LOW(val), tcg_env,
218                        offsetof(CPUState, neg.plugin_mem_value_low) -
219                        sizeof(CPUState));
220         tcg_gen_st_i64(TCGV128_HIGH(val), tcg_env,
221                        offsetof(CPUState, neg.plugin_mem_value_high) -
222                        sizeof(CPUState));
223         plugin_gen_mem_callbacks(copy_addr, orig_addr, oi, rw);
224     }
225 #endif
226 }
227 
228 static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
229                                     TCGArg idx, MemOp memop)
230 {
231     MemOp orig_memop;
232     MemOpIdx orig_oi, oi;
233     TCGv_i64 copy_addr;
234     TCGOpcode opc;
235 
236     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
237     orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
238     orig_oi = oi = make_memop_idx(memop, idx);
239 
240     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
241         memop &= ~MO_BSWAP;
242         /* The bswap primitive benefits from zero-extended input.  */
243         if ((memop & MO_SSIZE) == MO_SW) {
244             memop &= ~MO_SIGN;
245         }
246         oi = make_memop_idx(memop, idx);
247     }
248 
249     copy_addr = plugin_maybe_preserve_addr(addr);
250     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
251         opc = INDEX_op_qemu_ld_a32_i32;
252     } else {
253         opc = INDEX_op_qemu_ld_a64_i32;
254     }
255     gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
256     plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
257                                  QEMU_PLUGIN_MEM_R);
258 
259     if ((orig_memop ^ memop) & MO_BSWAP) {
260         switch (orig_memop & MO_SIZE) {
261         case MO_16:
262             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
263                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
264                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
265             break;
266         case MO_32:
267             tcg_gen_bswap32_i32(val, val);
268             break;
269         default:
270             g_assert_not_reached();
271         }
272     }
273 }
274 
275 void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
276                              MemOp memop, TCGType addr_type)
277 {
278     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
279     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
280     tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
281 }
282 
283 static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
284                                     TCGArg idx, MemOp memop)
285 {
286     TCGv_i32 swap = NULL;
287     MemOpIdx orig_oi, oi;
288     TCGOpcode opc;
289 
290     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
291     memop = tcg_canonicalize_memop(memop, 0, 1);
292     orig_oi = oi = make_memop_idx(memop, idx);
293 
294     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
295         swap = tcg_temp_ebb_new_i32();
296         switch (memop & MO_SIZE) {
297         case MO_16:
298             tcg_gen_bswap16_i32(swap, val, 0);
299             break;
300         case MO_32:
301             tcg_gen_bswap32_i32(swap, val);
302             break;
303         default:
304             g_assert_not_reached();
305         }
306         val = swap;
307         memop &= ~MO_BSWAP;
308         oi = make_memop_idx(memop, idx);
309     }
310 
311     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
312         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
313             opc = INDEX_op_qemu_st8_a32_i32;
314         } else {
315             opc = INDEX_op_qemu_st8_a64_i32;
316         }
317     } else {
318         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
319             opc = INDEX_op_qemu_st_a32_i32;
320         } else {
321             opc = INDEX_op_qemu_st_a64_i32;
322         }
323     }
324     gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi);
325     plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
326 
327     if (swap) {
328         tcg_temp_free_i32(swap);
329     }
330 }
331 
332 void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
333                              MemOp memop, TCGType addr_type)
334 {
335     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
336     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
337     tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
338 }
339 
340 static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
341                                     TCGArg idx, MemOp memop)
342 {
343     MemOp orig_memop;
344     MemOpIdx orig_oi, oi;
345     TCGv_i64 copy_addr;
346     TCGOpcode opc;
347 
348     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
349         tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
350         if (memop & MO_SIGN) {
351             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
352         } else {
353             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
354         }
355         return;
356     }
357 
358     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
359     orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
360     orig_oi = oi = make_memop_idx(memop, idx);
361 
362     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
363         memop &= ~MO_BSWAP;
364         /* The bswap primitive benefits from zero-extended input.  */
365         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
366             memop &= ~MO_SIGN;
367         }
368         oi = make_memop_idx(memop, idx);
369     }
370 
371     copy_addr = plugin_maybe_preserve_addr(addr);
372     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
373         opc = INDEX_op_qemu_ld_a32_i64;
374     } else {
375         opc = INDEX_op_qemu_ld_a64_i64;
376     }
377     gen_ldst_i64(opc, val, addr, oi);
378     plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi,
379                                  QEMU_PLUGIN_MEM_R);
380 
381     if ((orig_memop ^ memop) & MO_BSWAP) {
382         int flags = (orig_memop & MO_SIGN
383                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
384                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
385         switch (orig_memop & MO_SIZE) {
386         case MO_16:
387             tcg_gen_bswap16_i64(val, val, flags);
388             break;
389         case MO_32:
390             tcg_gen_bswap32_i64(val, val, flags);
391             break;
392         case MO_64:
393             tcg_gen_bswap64_i64(val, val);
394             break;
395         default:
396             g_assert_not_reached();
397         }
398     }
399 }
400 
401 void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
402                              MemOp memop, TCGType addr_type)
403 {
404     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
405     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
406     tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
407 }
408 
409 static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
410                                     TCGArg idx, MemOp memop)
411 {
412     TCGv_i64 swap = NULL;
413     MemOpIdx orig_oi, oi;
414     TCGOpcode opc;
415 
416     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
417         tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
418         return;
419     }
420 
421     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
422     memop = tcg_canonicalize_memop(memop, 1, 1);
423     orig_oi = oi = make_memop_idx(memop, idx);
424 
425     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
426         swap = tcg_temp_ebb_new_i64();
427         switch (memop & MO_SIZE) {
428         case MO_16:
429             tcg_gen_bswap16_i64(swap, val, 0);
430             break;
431         case MO_32:
432             tcg_gen_bswap32_i64(swap, val, 0);
433             break;
434         case MO_64:
435             tcg_gen_bswap64_i64(swap, val);
436             break;
437         default:
438             g_assert_not_reached();
439         }
440         val = swap;
441         memop &= ~MO_BSWAP;
442         oi = make_memop_idx(memop, idx);
443     }
444 
445     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
446         opc = INDEX_op_qemu_st_a32_i64;
447     } else {
448         opc = INDEX_op_qemu_st_a64_i64;
449     }
450     gen_ldst_i64(opc, val, addr, oi);
451     plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
452 
453     if (swap) {
454         tcg_temp_free_i64(swap);
455     }
456 }
457 
458 void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
459                              MemOp memop, TCGType addr_type)
460 {
461     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
462     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
463     tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
464 }
465 
466 /*
467  * Return true if @mop, without knowledge of the pointer alignment,
468  * does not require 16-byte atomicity, and it would be adventagous
469  * to avoid a call to a helper function.
470  */
471 static bool use_two_i64_for_i128(MemOp mop)
472 {
473     /* Two softmmu tlb lookups is larger than one function call. */
474     if (tcg_use_softmmu) {
475         return false;
476     }
477 
478     /*
479      * For user-only, two 64-bit operations may well be smaller than a call.
480      * Determine if that would be legal for the requested atomicity.
481      */
482     switch (mop & MO_ATOM_MASK) {
483     case MO_ATOM_NONE:
484     case MO_ATOM_IFALIGN_PAIR:
485         return true;
486     case MO_ATOM_IFALIGN:
487     case MO_ATOM_SUBALIGN:
488     case MO_ATOM_WITHIN16:
489     case MO_ATOM_WITHIN16_PAIR:
490         return false;
491     default:
492         g_assert_not_reached();
493     }
494 }
495 
496 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
497 {
498     MemOp mop_1 = orig, mop_2;
499 
500     /* Reduce the size to 64-bit. */
501     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
502 
503     /* Retain the alignment constraints of the original. */
504     switch (orig & MO_AMASK) {
505     case MO_UNALN:
506     case MO_ALIGN_2:
507     case MO_ALIGN_4:
508         mop_2 = mop_1;
509         break;
510     case MO_ALIGN_8:
511         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
512         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
513         mop_2 = mop_1;
514         break;
515     case MO_ALIGN:
516         /* Second has 8-byte alignment; first has 16-byte alignment. */
517         mop_2 = mop_1;
518         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
519         break;
520     case MO_ALIGN_16:
521     case MO_ALIGN_32:
522     case MO_ALIGN_64:
523         /* Second has 8-byte alignment; first retains original. */
524         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
525         break;
526     default:
527         g_assert_not_reached();
528     }
529 
530     /* Use a memory ordering implemented by the host. */
531     if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
532         mop_1 &= ~MO_BSWAP;
533         mop_2 &= ~MO_BSWAP;
534     }
535 
536     ret[0] = mop_1;
537     ret[1] = mop_2;
538 }
539 
540 static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
541 {
542     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
543         TCGv_i64 a64 = tcg_temp_ebb_new_i64();
544         tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
545         return a64;
546     }
547     return temp_tcgv_i64(addr);
548 }
549 
550 static void maybe_free_addr64(TCGv_i64 a64)
551 {
552     if (tcg_ctx->addr_type == TCG_TYPE_I32) {
553         tcg_temp_free_i64(a64);
554     }
555 }
556 
557 static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
558                                      TCGArg idx, MemOp memop)
559 {
560     MemOpIdx orig_oi;
561     TCGv_i64 ext_addr = NULL;
562     TCGOpcode opc;
563 
564     check_max_alignment(memop_alignment_bits(memop));
565     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
566 
567     /* In serial mode, reduce atomicity. */
568     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
569         memop &= ~MO_ATOM_MASK;
570         memop |= MO_ATOM_NONE;
571     }
572     orig_oi = make_memop_idx(memop, idx);
573 
574     /* TODO: For now, force 32-bit hosts to use the helper. */
575     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
576         TCGv_i64 lo, hi;
577         bool need_bswap = false;
578         MemOpIdx oi = orig_oi;
579 
580         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
581             lo = TCGV128_HIGH(val);
582             hi = TCGV128_LOW(val);
583             oi = make_memop_idx(memop & ~MO_BSWAP, idx);
584             need_bswap = true;
585         } else {
586             lo = TCGV128_LOW(val);
587             hi = TCGV128_HIGH(val);
588         }
589 
590         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
591             opc = INDEX_op_qemu_ld_a32_i128;
592         } else {
593             opc = INDEX_op_qemu_ld_a64_i128;
594         }
595         gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
596                  tcgv_i64_temp(hi), addr, oi);
597 
598         if (need_bswap) {
599             tcg_gen_bswap64_i64(lo, lo);
600             tcg_gen_bswap64_i64(hi, hi);
601         }
602     } else if (use_two_i64_for_i128(memop)) {
603         MemOp mop[2];
604         TCGTemp *addr_p8;
605         TCGv_i64 x, y;
606         bool need_bswap;
607 
608         canonicalize_memop_i128_as_i64(mop, memop);
609         need_bswap = (mop[0] ^ memop) & MO_BSWAP;
610 
611         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
612             opc = INDEX_op_qemu_ld_a32_i64;
613         } else {
614             opc = INDEX_op_qemu_ld_a64_i64;
615         }
616 
617         /*
618          * Since there are no global TCGv_i128, there is no visible state
619          * changed if the second load faults.  Load directly into the two
620          * subwords.
621          */
622         if ((memop & MO_BSWAP) == MO_LE) {
623             x = TCGV128_LOW(val);
624             y = TCGV128_HIGH(val);
625         } else {
626             x = TCGV128_HIGH(val);
627             y = TCGV128_LOW(val);
628         }
629 
630         gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
631 
632         if (need_bswap) {
633             tcg_gen_bswap64_i64(x, x);
634         }
635 
636         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
637             TCGv_i32 t = tcg_temp_ebb_new_i32();
638             tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
639             addr_p8 = tcgv_i32_temp(t);
640         } else {
641             TCGv_i64 t = tcg_temp_ebb_new_i64();
642             tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
643             addr_p8 = tcgv_i64_temp(t);
644         }
645 
646         gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
647         tcg_temp_free_internal(addr_p8);
648 
649         if (need_bswap) {
650             tcg_gen_bswap64_i64(y, y);
651         }
652     } else {
653         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
654             ext_addr = tcg_temp_ebb_new_i64();
655             tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
656             addr = tcgv_i64_temp(ext_addr);
657         }
658         gen_helper_ld_i128(val, tcg_env, temp_tcgv_i64(addr),
659                            tcg_constant_i32(orig_oi));
660     }
661 
662     plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
663                                   QEMU_PLUGIN_MEM_R);
664 }
665 
666 void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
667                               MemOp memop, TCGType addr_type)
668 {
669     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
670     tcg_debug_assert((memop & MO_SIZE) == MO_128);
671     tcg_debug_assert((memop & MO_SIGN) == 0);
672     tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
673 }
674 
675 static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
676                                      TCGArg idx, MemOp memop)
677 {
678     MemOpIdx orig_oi;
679     TCGv_i64 ext_addr = NULL;
680     TCGOpcode opc;
681 
682     check_max_alignment(memop_alignment_bits(memop));
683     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
684 
685     /* In serial mode, reduce atomicity. */
686     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
687         memop &= ~MO_ATOM_MASK;
688         memop |= MO_ATOM_NONE;
689     }
690     orig_oi = make_memop_idx(memop, idx);
691 
692     /* TODO: For now, force 32-bit hosts to use the helper. */
693 
694     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
695         TCGv_i64 lo, hi;
696         MemOpIdx oi = orig_oi;
697         bool need_bswap = false;
698 
699         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
700             lo = tcg_temp_ebb_new_i64();
701             hi = tcg_temp_ebb_new_i64();
702             tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
703             tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
704             oi = make_memop_idx(memop & ~MO_BSWAP, idx);
705             need_bswap = true;
706         } else {
707             lo = TCGV128_LOW(val);
708             hi = TCGV128_HIGH(val);
709         }
710 
711         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
712             opc = INDEX_op_qemu_st_a32_i128;
713         } else {
714             opc = INDEX_op_qemu_st_a64_i128;
715         }
716         gen_ldst(opc, TCG_TYPE_I128, tcgv_i64_temp(lo),
717                  tcgv_i64_temp(hi), addr, oi);
718 
719         if (need_bswap) {
720             tcg_temp_free_i64(lo);
721             tcg_temp_free_i64(hi);
722         }
723     } else if (use_two_i64_for_i128(memop)) {
724         MemOp mop[2];
725         TCGTemp *addr_p8;
726         TCGv_i64 x, y, b = NULL;
727 
728         canonicalize_memop_i128_as_i64(mop, memop);
729 
730         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
731             opc = INDEX_op_qemu_st_a32_i64;
732         } else {
733             opc = INDEX_op_qemu_st_a64_i64;
734         }
735 
736         if ((memop & MO_BSWAP) == MO_LE) {
737             x = TCGV128_LOW(val);
738             y = TCGV128_HIGH(val);
739         } else {
740             x = TCGV128_HIGH(val);
741             y = TCGV128_LOW(val);
742         }
743 
744         if ((mop[0] ^ memop) & MO_BSWAP) {
745             b = tcg_temp_ebb_new_i64();
746             tcg_gen_bswap64_i64(b, x);
747             x = b;
748         }
749 
750         gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
751 
752         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
753             TCGv_i32 t = tcg_temp_ebb_new_i32();
754             tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
755             addr_p8 = tcgv_i32_temp(t);
756         } else {
757             TCGv_i64 t = tcg_temp_ebb_new_i64();
758             tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
759             addr_p8 = tcgv_i64_temp(t);
760         }
761 
762         if (b) {
763             tcg_gen_bswap64_i64(b, y);
764             gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
765             tcg_temp_free_i64(b);
766         } else {
767             gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
768         }
769         tcg_temp_free_internal(addr_p8);
770     } else {
771         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
772             ext_addr = tcg_temp_ebb_new_i64();
773             tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
774             addr = tcgv_i64_temp(ext_addr);
775         }
776         gen_helper_st_i128(tcg_env, temp_tcgv_i64(addr), val,
777                            tcg_constant_i32(orig_oi));
778     }
779 
780     plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
781                                   QEMU_PLUGIN_MEM_W);
782 }
783 
784 void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
785                               MemOp memop, TCGType addr_type)
786 {
787     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
788     tcg_debug_assert((memop & MO_SIZE) == MO_128);
789     tcg_debug_assert((memop & MO_SIGN) == 0);
790     tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
791 }
792 
793 void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
794 {
795     switch (opc & MO_SSIZE) {
796     case MO_SB:
797         tcg_gen_ext8s_i32(ret, val);
798         break;
799     case MO_UB:
800         tcg_gen_ext8u_i32(ret, val);
801         break;
802     case MO_SW:
803         tcg_gen_ext16s_i32(ret, val);
804         break;
805     case MO_UW:
806         tcg_gen_ext16u_i32(ret, val);
807         break;
808     case MO_UL:
809     case MO_SL:
810         tcg_gen_mov_i32(ret, val);
811         break;
812     default:
813         g_assert_not_reached();
814     }
815 }
816 
817 void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
818 {
819     switch (opc & MO_SSIZE) {
820     case MO_SB:
821         tcg_gen_ext8s_i64(ret, val);
822         break;
823     case MO_UB:
824         tcg_gen_ext8u_i64(ret, val);
825         break;
826     case MO_SW:
827         tcg_gen_ext16s_i64(ret, val);
828         break;
829     case MO_UW:
830         tcg_gen_ext16u_i64(ret, val);
831         break;
832     case MO_SL:
833         tcg_gen_ext32s_i64(ret, val);
834         break;
835     case MO_UL:
836         tcg_gen_ext32u_i64(ret, val);
837         break;
838     case MO_UQ:
839     case MO_SQ:
840         tcg_gen_mov_i64(ret, val);
841         break;
842     default:
843         g_assert_not_reached();
844     }
845 }
846 
847 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
848                                   TCGv_i32, TCGv_i32, TCGv_i32);
849 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
850                                   TCGv_i64, TCGv_i64, TCGv_i32);
851 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
852                                    TCGv_i128, TCGv_i128, TCGv_i32);
853 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
854                                   TCGv_i32, TCGv_i32);
855 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
856                                   TCGv_i64, TCGv_i32);
857 
858 #ifdef CONFIG_ATOMIC64
859 # define WITH_ATOMIC64(X) X,
860 #else
861 # define WITH_ATOMIC64(X)
862 #endif
863 #if HAVE_CMPXCHG128
864 # define WITH_ATOMIC128(X) X,
865 #else
866 # define WITH_ATOMIC128(X)
867 #endif
868 
869 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
870     [MO_8] = gen_helper_atomic_cmpxchgb,
871     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
872     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
873     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
874     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
875     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
876     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
877     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
878     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
879 };
880 
881 static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
882                                               TCGv_i32 cmpv, TCGv_i32 newv,
883                                               TCGArg idx, MemOp memop)
884 {
885     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
886     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
887 
888     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
889 
890     tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
891     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
892     tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
893     tcg_temp_free_i32(t2);
894 
895     if (memop & MO_SIGN) {
896         tcg_gen_ext_i32(retv, t1, memop);
897     } else {
898         tcg_gen_mov_i32(retv, t1);
899     }
900     tcg_temp_free_i32(t1);
901 }
902 
903 void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
904                                        TCGv_i32 cmpv, TCGv_i32 newv,
905                                        TCGArg idx, MemOp memop,
906                                        TCGType addr_type)
907 {
908     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
909     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
910     tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
911 }
912 
913 static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
914                                            TCGv_i32 cmpv, TCGv_i32 newv,
915                                            TCGArg idx, MemOp memop)
916 {
917     gen_atomic_cx_i32 gen;
918     TCGv_i64 a64;
919     MemOpIdx oi;
920 
921     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
922         tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
923         return;
924     }
925 
926     memop = tcg_canonicalize_memop(memop, 0, 0);
927     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
928     tcg_debug_assert(gen != NULL);
929 
930     oi = make_memop_idx(memop & ~MO_SIGN, idx);
931     a64 = maybe_extend_addr64(addr);
932     gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
933     maybe_free_addr64(a64);
934 
935     if (memop & MO_SIGN) {
936         tcg_gen_ext_i32(retv, retv, memop);
937     }
938 }
939 
940 void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
941                                     TCGv_i32 cmpv, TCGv_i32 newv,
942                                     TCGArg idx, MemOp memop,
943                                     TCGType addr_type)
944 {
945     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
946     tcg_debug_assert((memop & MO_SIZE) <= MO_32);
947     tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
948 }
949 
950 static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
951                                               TCGv_i64 cmpv, TCGv_i64 newv,
952                                               TCGArg idx, MemOp memop)
953 {
954     TCGv_i64 t1, t2;
955 
956     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
957         tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
958                                           TCGV_LOW(newv), idx, memop);
959         if (memop & MO_SIGN) {
960             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
961         } else {
962             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
963         }
964         return;
965     }
966 
967     t1 = tcg_temp_ebb_new_i64();
968     t2 = tcg_temp_ebb_new_i64();
969 
970     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
971 
972     tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
973     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
974     tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
975     tcg_temp_free_i64(t2);
976 
977     if (memop & MO_SIGN) {
978         tcg_gen_ext_i64(retv, t1, memop);
979     } else {
980         tcg_gen_mov_i64(retv, t1);
981     }
982     tcg_temp_free_i64(t1);
983 }
984 
985 void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
986                                        TCGv_i64 cmpv, TCGv_i64 newv,
987                                        TCGArg idx, MemOp memop,
988                                        TCGType addr_type)
989 {
990     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
991     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
992     tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
993 }
994 
995 static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
996                                            TCGv_i64 cmpv, TCGv_i64 newv,
997                                            TCGArg idx, MemOp memop)
998 {
999     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1000         tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
1001         return;
1002     }
1003 
1004     if ((memop & MO_SIZE) == MO_64) {
1005         gen_atomic_cx_i64 gen;
1006 
1007         memop = tcg_canonicalize_memop(memop, 1, 0);
1008         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1009         if (gen) {
1010             MemOpIdx oi = make_memop_idx(memop, idx);
1011             TCGv_i64 a64 = maybe_extend_addr64(addr);
1012             gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
1013             maybe_free_addr64(a64);
1014             return;
1015         }
1016 
1017         gen_helper_exit_atomic(tcg_env);
1018 
1019         /*
1020          * Produce a result for a well-formed opcode stream.  This satisfies
1021          * liveness for set before used, which happens before this dead code
1022          * is removed.
1023          */
1024         tcg_gen_movi_i64(retv, 0);
1025         return;
1026     }
1027 
1028     if (TCG_TARGET_REG_BITS == 32) {
1029         tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1030                                        TCGV_LOW(newv), idx, memop);
1031         if (memop & MO_SIGN) {
1032             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1033         } else {
1034             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1035         }
1036     } else {
1037         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
1038         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
1039         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1040 
1041         tcg_gen_extrl_i64_i32(c32, cmpv);
1042         tcg_gen_extrl_i64_i32(n32, newv);
1043         tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
1044                                        idx, memop & ~MO_SIGN);
1045         tcg_temp_free_i32(c32);
1046         tcg_temp_free_i32(n32);
1047 
1048         tcg_gen_extu_i32_i64(retv, r32);
1049         tcg_temp_free_i32(r32);
1050 
1051         if (memop & MO_SIGN) {
1052             tcg_gen_ext_i64(retv, retv, memop);
1053         }
1054     }
1055 }
1056 
1057 void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
1058                                     TCGv_i64 cmpv, TCGv_i64 newv,
1059                                     TCGArg idx, MemOp memop, TCGType addr_type)
1060 {
1061     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1062     tcg_debug_assert((memop & MO_SIZE) <= MO_64);
1063     tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
1064 }
1065 
1066 static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1067                                                TCGv_i128 cmpv, TCGv_i128 newv,
1068                                                TCGArg idx, MemOp memop)
1069 {
1070     if (TCG_TARGET_REG_BITS == 32) {
1071         /* Inline expansion below is simply too large for 32-bit hosts. */
1072         MemOpIdx oi = make_memop_idx(memop, idx);
1073         TCGv_i64 a64 = maybe_extend_addr64(addr);
1074 
1075         gen_helper_nonatomic_cmpxchgo(retv, tcg_env, a64, cmpv, newv,
1076                                       tcg_constant_i32(oi));
1077         maybe_free_addr64(a64);
1078     } else {
1079         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1080         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1081         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1082         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1083         TCGv_i64 z = tcg_constant_i64(0);
1084 
1085         tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
1086 
1087         /* Compare i128 */
1088         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
1089         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
1090         tcg_gen_or_i64(t0, t0, t1);
1091 
1092         /* tmpv = equal ? newv : oldv */
1093         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
1094                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
1095         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1096                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1097 
1098         /* Unconditional writeback. */
1099         tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
1100         tcg_gen_mov_i128(retv, oldv);
1101 
1102         tcg_temp_free_i64(t0);
1103         tcg_temp_free_i64(t1);
1104         tcg_temp_free_i128(tmpv);
1105         tcg_temp_free_i128(oldv);
1106     }
1107 }
1108 
1109 void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1110                                         TCGv_i128 cmpv, TCGv_i128 newv,
1111                                         TCGArg idx, MemOp memop,
1112                                         TCGType addr_type)
1113 {
1114     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1115     tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1116     tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1117 }
1118 
1119 static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1120                                             TCGv_i128 cmpv, TCGv_i128 newv,
1121                                             TCGArg idx, MemOp memop)
1122 {
1123     gen_atomic_cx_i128 gen;
1124 
1125     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1126         tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1127         return;
1128     }
1129 
1130     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1131     if (gen) {
1132         MemOpIdx oi = make_memop_idx(memop, idx);
1133         TCGv_i64 a64 = maybe_extend_addr64(addr);
1134         gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
1135         maybe_free_addr64(a64);
1136         return;
1137     }
1138 
1139     gen_helper_exit_atomic(tcg_env);
1140 
1141     /*
1142      * Produce a result for a well-formed opcode stream.  This satisfies
1143      * liveness for set before used, which happens before this dead code
1144      * is removed.
1145      */
1146     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1147     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1148 }
1149 
1150 void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1151                                      TCGv_i128 cmpv, TCGv_i128 newv,
1152                                      TCGArg idx, MemOp memop,
1153                                      TCGType addr_type)
1154 {
1155     tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1156     tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1157     tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1158 }
1159 
1160 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1161                                 TCGArg idx, MemOp memop, bool new_val,
1162                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1163 {
1164     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1165     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1166 
1167     memop = tcg_canonicalize_memop(memop, 0, 0);
1168 
1169     tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
1170     tcg_gen_ext_i32(t2, val, memop);
1171     gen(t2, t1, t2);
1172     tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
1173 
1174     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1175     tcg_temp_free_i32(t1);
1176     tcg_temp_free_i32(t2);
1177 }
1178 
1179 static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1180                              TCGArg idx, MemOp memop, void * const table[])
1181 {
1182     gen_atomic_op_i32 gen;
1183     TCGv_i64 a64;
1184     MemOpIdx oi;
1185 
1186     memop = tcg_canonicalize_memop(memop, 0, 0);
1187 
1188     gen = table[memop & (MO_SIZE | MO_BSWAP)];
1189     tcg_debug_assert(gen != NULL);
1190 
1191     oi = make_memop_idx(memop & ~MO_SIGN, idx);
1192     a64 = maybe_extend_addr64(addr);
1193     gen(ret, tcg_env, a64, val, tcg_constant_i32(oi));
1194     maybe_free_addr64(a64);
1195 
1196     if (memop & MO_SIGN) {
1197         tcg_gen_ext_i32(ret, ret, memop);
1198     }
1199 }
1200 
1201 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1202                                 TCGArg idx, MemOp memop, bool new_val,
1203                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1204 {
1205     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1206     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1207 
1208     memop = tcg_canonicalize_memop(memop, 1, 0);
1209 
1210     tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
1211     tcg_gen_ext_i64(t2, val, memop);
1212     gen(t2, t1, t2);
1213     tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
1214 
1215     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1216     tcg_temp_free_i64(t1);
1217     tcg_temp_free_i64(t2);
1218 }
1219 
1220 static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1221                              TCGArg idx, MemOp memop, void * const table[])
1222 {
1223     memop = tcg_canonicalize_memop(memop, 1, 0);
1224 
1225     if ((memop & MO_SIZE) == MO_64) {
1226         gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
1227 
1228         if (gen) {
1229             MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
1230             TCGv_i64 a64 = maybe_extend_addr64(addr);
1231             gen(ret, tcg_env, a64, val, tcg_constant_i32(oi));
1232             maybe_free_addr64(a64);
1233             return;
1234         }
1235 
1236         gen_helper_exit_atomic(tcg_env);
1237         /* Produce a result, so that we have a well-formed opcode stream
1238            with respect to uses of the result in the (dead) code following.  */
1239         tcg_gen_movi_i64(ret, 0);
1240     } else {
1241         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1242         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1243 
1244         tcg_gen_extrl_i64_i32(v32, val);
1245         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1246         tcg_temp_free_i32(v32);
1247 
1248         tcg_gen_extu_i32_i64(ret, r32);
1249         tcg_temp_free_i32(r32);
1250 
1251         if (memop & MO_SIGN) {
1252             tcg_gen_ext_i64(ret, ret, memop);
1253         }
1254     }
1255 }
1256 
1257 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
1258 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
1259     [MO_8] = gen_helper_atomic_##NAME##b,                               \
1260     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
1261     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
1262     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
1263     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
1264     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
1265     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
1266 };                                                                      \
1267 void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr,       \
1268                                      TCGv_i32 val, TCGArg idx,          \
1269                                      MemOp memop, TCGType addr_type)    \
1270 {                                                                       \
1271     tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1272     tcg_debug_assert((memop & MO_SIZE) <= MO_32);                       \
1273     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1274         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
1275     } else {                                                            \
1276         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
1277                             tcg_gen_##OP##_i32);                        \
1278     }                                                                   \
1279 }                                                                       \
1280 void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr,       \
1281                                      TCGv_i64 val, TCGArg idx,          \
1282                                      MemOp memop, TCGType addr_type)    \
1283 {                                                                       \
1284     tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1285     tcg_debug_assert((memop & MO_SIZE) <= MO_64);                       \
1286     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1287         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
1288     } else {                                                            \
1289         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
1290                             tcg_gen_##OP##_i64);                        \
1291     }                                                                   \
1292 }
1293 
1294 GEN_ATOMIC_HELPER(fetch_add, add, 0)
1295 GEN_ATOMIC_HELPER(fetch_and, and, 0)
1296 GEN_ATOMIC_HELPER(fetch_or, or, 0)
1297 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1298 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1299 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1300 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1301 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1302 
1303 GEN_ATOMIC_HELPER(add_fetch, add, 1)
1304 GEN_ATOMIC_HELPER(and_fetch, and, 1)
1305 GEN_ATOMIC_HELPER(or_fetch, or, 1)
1306 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1307 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1308 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1309 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1310 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1311 
1312 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1313 {
1314     tcg_gen_mov_i32(r, b);
1315 }
1316 
1317 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1318 {
1319     tcg_gen_mov_i64(r, b);
1320 }
1321 
1322 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1323 
1324 #undef GEN_ATOMIC_HELPER
1325