xref: /qemu/tcg/tcg-op-ldst.c (revision fcdab382c8b92bcc689b18f8ba5cd036139945bf)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-temp-internal.h"
29 #include "tcg/tcg-op.h"
30 #include "tcg/tcg-mo.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 
34 
35 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
36 {
37     /* Trigger the asserts within as early as possible.  */
38     unsigned a_bits = get_alignment_bits(op);
39 
40     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
41     if (a_bits == (op & MO_SIZE)) {
42         op = (op & ~MO_AMASK) | MO_ALIGN;
43     }
44 
45     switch (op & MO_SIZE) {
46     case MO_8:
47         op &= ~MO_BSWAP;
48         break;
49     case MO_16:
50         break;
51     case MO_32:
52         if (!is64) {
53             op &= ~MO_SIGN;
54         }
55         break;
56     case MO_64:
57         if (is64) {
58             op &= ~MO_SIGN;
59             break;
60         }
61         /* fall through */
62     default:
63         g_assert_not_reached();
64     }
65     if (st) {
66         op &= ~MO_SIGN;
67     }
68     return op;
69 }
70 
71 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
72                          MemOp memop, TCGArg idx)
73 {
74     MemOpIdx oi = make_memop_idx(memop, idx);
75 #if TARGET_LONG_BITS == 32
76     tcg_gen_op3i_i32(opc, val, addr, oi);
77 #else
78     if (TCG_TARGET_REG_BITS == 32) {
79         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
80     } else {
81         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
82     }
83 #endif
84 }
85 
86 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
87                          MemOp memop, TCGArg idx)
88 {
89     MemOpIdx oi = make_memop_idx(memop, idx);
90 #if TARGET_LONG_BITS == 32
91     if (TCG_TARGET_REG_BITS == 32) {
92         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
93     } else {
94         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
95     }
96 #else
97     if (TCG_TARGET_REG_BITS == 32) {
98         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
99                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
100     } else {
101         tcg_gen_op3i_i64(opc, val, addr, oi);
102     }
103 #endif
104 }
105 
106 static void tcg_gen_req_mo(TCGBar type)
107 {
108 #ifdef TCG_GUEST_DEFAULT_MO
109     type &= TCG_GUEST_DEFAULT_MO;
110 #endif
111     type &= ~TCG_TARGET_DEFAULT_MO;
112     if (type) {
113         tcg_gen_mb(type | TCG_BAR_SC);
114     }
115 }
116 
117 /* Only required for loads, where value might overlap addr. */
118 static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
119 {
120 #ifdef CONFIG_PLUGIN
121     if (tcg_ctx->plugin_insn != NULL) {
122         /* Save a copy of the vaddr for use after a load.  */
123         TCGv_i64 temp = tcg_temp_ebb_new_i64();
124         tcg_gen_extu_tl_i64(temp, vaddr);
125         return temp;
126     }
127 #endif
128     return NULL;
129 }
130 
131 static void
132 plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
133                          enum qemu_plugin_mem_rw rw)
134 {
135 #ifdef CONFIG_PLUGIN
136     if (tcg_ctx->plugin_insn != NULL) {
137         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
138 
139 #if TARGET_LONG_BITS == 64
140         if (copy_addr) {
141             plugin_gen_empty_mem_callback(copy_addr, info);
142             tcg_temp_free_i64(copy_addr);
143         } else {
144             plugin_gen_empty_mem_callback(orig_addr, info);
145         }
146 #else
147         if (!copy_addr) {
148             copy_addr = tcg_temp_ebb_new_i64();
149             tcg_gen_extu_tl_i64(copy_addr, orig_addr);
150         }
151         plugin_gen_empty_mem_callback(copy_addr, info);
152         tcg_temp_free_i64(copy_addr);
153 #endif
154     }
155 #endif
156 }
157 
158 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
159 {
160     MemOp orig_memop;
161     MemOpIdx oi;
162     TCGv_i64 copy_addr;
163 
164     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
165     memop = tcg_canonicalize_memop(memop, 0, 0);
166     oi = make_memop_idx(memop, idx);
167 
168     orig_memop = memop;
169     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
170         memop &= ~MO_BSWAP;
171         /* The bswap primitive benefits from zero-extended input.  */
172         if ((memop & MO_SSIZE) == MO_SW) {
173             memop &= ~MO_SIGN;
174         }
175     }
176 
177     copy_addr = plugin_maybe_preserve_addr(addr);
178     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
179     plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
180 
181     if ((orig_memop ^ memop) & MO_BSWAP) {
182         switch (orig_memop & MO_SIZE) {
183         case MO_16:
184             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
185                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
186                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
187             break;
188         case MO_32:
189             tcg_gen_bswap32_i32(val, val);
190             break;
191         default:
192             g_assert_not_reached();
193         }
194     }
195 }
196 
197 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
198 {
199     TCGv_i32 swap = NULL;
200     MemOpIdx oi;
201 
202     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
203     memop = tcg_canonicalize_memop(memop, 0, 1);
204     oi = make_memop_idx(memop, idx);
205 
206     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
207         swap = tcg_temp_ebb_new_i32();
208         switch (memop & MO_SIZE) {
209         case MO_16:
210             tcg_gen_bswap16_i32(swap, val, 0);
211             break;
212         case MO_32:
213             tcg_gen_bswap32_i32(swap, val);
214             break;
215         default:
216             g_assert_not_reached();
217         }
218         val = swap;
219         memop &= ~MO_BSWAP;
220     }
221 
222     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
223         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
224     } else {
225         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
226     }
227     plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
228 
229     if (swap) {
230         tcg_temp_free_i32(swap);
231     }
232 }
233 
234 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
235 {
236     MemOp orig_memop;
237     MemOpIdx oi;
238     TCGv_i64 copy_addr;
239 
240     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
241         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
242         if (memop & MO_SIGN) {
243             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
244         } else {
245             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
246         }
247         return;
248     }
249 
250     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
251     memop = tcg_canonicalize_memop(memop, 1, 0);
252     oi = make_memop_idx(memop, idx);
253 
254     orig_memop = memop;
255     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
256         memop &= ~MO_BSWAP;
257         /* The bswap primitive benefits from zero-extended input.  */
258         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
259             memop &= ~MO_SIGN;
260         }
261     }
262 
263     copy_addr = plugin_maybe_preserve_addr(addr);
264     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
265     plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
266 
267     if ((orig_memop ^ memop) & MO_BSWAP) {
268         int flags = (orig_memop & MO_SIGN
269                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
270                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
271         switch (orig_memop & MO_SIZE) {
272         case MO_16:
273             tcg_gen_bswap16_i64(val, val, flags);
274             break;
275         case MO_32:
276             tcg_gen_bswap32_i64(val, val, flags);
277             break;
278         case MO_64:
279             tcg_gen_bswap64_i64(val, val);
280             break;
281         default:
282             g_assert_not_reached();
283         }
284     }
285 }
286 
287 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
288 {
289     TCGv_i64 swap = NULL;
290     MemOpIdx oi;
291 
292     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
293         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
294         return;
295     }
296 
297     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
298     memop = tcg_canonicalize_memop(memop, 1, 1);
299     oi = make_memop_idx(memop, idx);
300 
301     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
302         swap = tcg_temp_ebb_new_i64();
303         switch (memop & MO_SIZE) {
304         case MO_16:
305             tcg_gen_bswap16_i64(swap, val, 0);
306             break;
307         case MO_32:
308             tcg_gen_bswap32_i64(swap, val, 0);
309             break;
310         case MO_64:
311             tcg_gen_bswap64_i64(swap, val);
312             break;
313         default:
314             g_assert_not_reached();
315         }
316         val = swap;
317         memop &= ~MO_BSWAP;
318     }
319 
320     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
321     plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
322 
323     if (swap) {
324         tcg_temp_free_i64(swap);
325     }
326 }
327 
328 /*
329  * Return true if @mop, without knowledge of the pointer alignment,
330  * does not require 16-byte atomicity, and it would be adventagous
331  * to avoid a call to a helper function.
332  */
333 static bool use_two_i64_for_i128(MemOp mop)
334 {
335 #ifdef CONFIG_SOFTMMU
336     /* Two softmmu tlb lookups is larger than one function call. */
337     return false;
338 #else
339     /*
340      * For user-only, two 64-bit operations may well be smaller than a call.
341      * Determine if that would be legal for the requested atomicity.
342      */
343     switch (mop & MO_ATOM_MASK) {
344     case MO_ATOM_NONE:
345     case MO_ATOM_IFALIGN_PAIR:
346         return true;
347     case MO_ATOM_IFALIGN:
348     case MO_ATOM_SUBALIGN:
349     case MO_ATOM_WITHIN16:
350     case MO_ATOM_WITHIN16_PAIR:
351         /* In a serialized context, no atomicity is required. */
352         return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
353     default:
354         g_assert_not_reached();
355     }
356 #endif
357 }
358 
359 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
360 {
361     MemOp mop_1 = orig, mop_2;
362 
363     tcg_debug_assert((orig & MO_SIZE) == MO_128);
364     tcg_debug_assert((orig & MO_SIGN) == 0);
365 
366     /* Reduce the size to 64-bit. */
367     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
368 
369     /* Retain the alignment constraints of the original. */
370     switch (orig & MO_AMASK) {
371     case MO_UNALN:
372     case MO_ALIGN_2:
373     case MO_ALIGN_4:
374         mop_2 = mop_1;
375         break;
376     case MO_ALIGN_8:
377         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
378         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
379         mop_2 = mop_1;
380         break;
381     case MO_ALIGN:
382         /* Second has 8-byte alignment; first has 16-byte alignment. */
383         mop_2 = mop_1;
384         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
385         break;
386     case MO_ALIGN_16:
387     case MO_ALIGN_32:
388     case MO_ALIGN_64:
389         /* Second has 8-byte alignment; first retains original. */
390         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
391         break;
392     default:
393         g_assert_not_reached();
394     }
395 
396     /* Use a memory ordering implemented by the host. */
397     if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
398         mop_1 &= ~MO_BSWAP;
399         mop_2 &= ~MO_BSWAP;
400     }
401 
402     ret[0] = mop_1;
403     ret[1] = mop_2;
404 }
405 
406 #if TARGET_LONG_BITS == 64
407 #define tcg_temp_ebb_new  tcg_temp_ebb_new_i64
408 #else
409 #define tcg_temp_ebb_new  tcg_temp_ebb_new_i32
410 #endif
411 
412 static TCGv_i64 maybe_extend_addr64(TCGv addr)
413 {
414 #if TARGET_LONG_BITS == 32
415     TCGv_i64 a64 = tcg_temp_ebb_new_i64();
416     tcg_gen_extu_i32_i64(a64, addr);
417     return a64;
418 #else
419     return addr;
420 #endif
421 }
422 
423 static void maybe_free_addr64(TCGv_i64 a64)
424 {
425 #if TARGET_LONG_BITS == 32
426     tcg_temp_free_i64(a64);
427 #endif
428 }
429 
430 void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
431 {
432     const MemOpIdx oi = make_memop_idx(memop, idx);
433 
434     tcg_debug_assert((memop & MO_SIZE) == MO_128);
435     tcg_debug_assert((memop & MO_SIGN) == 0);
436 
437     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
438 
439     /* TODO: For now, force 32-bit hosts to use the helper. */
440     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
441         TCGv_i64 lo, hi;
442         TCGArg addr_arg;
443         MemOpIdx adj_oi;
444         bool need_bswap = false;
445 
446         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
447             lo = TCGV128_HIGH(val);
448             hi = TCGV128_LOW(val);
449             adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
450             need_bswap = true;
451         } else {
452             lo = TCGV128_LOW(val);
453             hi = TCGV128_HIGH(val);
454             adj_oi = oi;
455         }
456 
457 #if TARGET_LONG_BITS == 32
458         addr_arg = tcgv_i32_arg(addr);
459 #else
460         addr_arg = tcgv_i64_arg(addr);
461 #endif
462         tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
463 
464         if (need_bswap) {
465             tcg_gen_bswap64_i64(lo, lo);
466             tcg_gen_bswap64_i64(hi, hi);
467         }
468     } else if (use_two_i64_for_i128(memop)) {
469         MemOp mop[2];
470         TCGv addr_p8;
471         TCGv_i64 x, y;
472 
473         canonicalize_memop_i128_as_i64(mop, memop);
474 
475         /*
476          * Since there are no global TCGv_i128, there is no visible state
477          * changed if the second load faults.  Load directly into the two
478          * subwords.
479          */
480         if ((memop & MO_BSWAP) == MO_LE) {
481             x = TCGV128_LOW(val);
482             y = TCGV128_HIGH(val);
483         } else {
484             x = TCGV128_HIGH(val);
485             y = TCGV128_LOW(val);
486         }
487 
488         gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
489 
490         if ((mop[0] ^ memop) & MO_BSWAP) {
491             tcg_gen_bswap64_i64(x, x);
492         }
493 
494         addr_p8 = tcg_temp_ebb_new();
495         tcg_gen_addi_tl(addr_p8, addr, 8);
496         gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
497         tcg_temp_free(addr_p8);
498 
499         if ((mop[0] ^ memop) & MO_BSWAP) {
500             tcg_gen_bswap64_i64(y, y);
501         }
502     } else {
503         TCGv_i64 a64 = maybe_extend_addr64(addr);
504         gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
505         maybe_free_addr64(a64);
506     }
507 
508     plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
509 }
510 
511 void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
512 {
513     const MemOpIdx oi = make_memop_idx(memop, idx);
514 
515     tcg_debug_assert((memop & MO_SIZE) == MO_128);
516     tcg_debug_assert((memop & MO_SIGN) == 0);
517 
518     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
519 
520     /* TODO: For now, force 32-bit hosts to use the helper. */
521 
522     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
523         TCGv_i64 lo, hi;
524         TCGArg addr_arg;
525         MemOpIdx adj_oi;
526         bool need_bswap = false;
527 
528         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
529             lo = tcg_temp_new_i64();
530             hi = tcg_temp_new_i64();
531             tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
532             tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
533             adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
534             need_bswap = true;
535         } else {
536             lo = TCGV128_LOW(val);
537             hi = TCGV128_HIGH(val);
538             adj_oi = oi;
539         }
540 
541 #if TARGET_LONG_BITS == 32
542         addr_arg = tcgv_i32_arg(addr);
543 #else
544         addr_arg = tcgv_i64_arg(addr);
545 #endif
546         tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
547 
548         if (need_bswap) {
549             tcg_temp_free_i64(lo);
550             tcg_temp_free_i64(hi);
551         }
552     } else if (use_two_i64_for_i128(memop)) {
553         MemOp mop[2];
554         TCGv addr_p8;
555         TCGv_i64 x, y;
556 
557         canonicalize_memop_i128_as_i64(mop, memop);
558 
559         if ((memop & MO_BSWAP) == MO_LE) {
560             x = TCGV128_LOW(val);
561             y = TCGV128_HIGH(val);
562         } else {
563             x = TCGV128_HIGH(val);
564             y = TCGV128_LOW(val);
565         }
566 
567         addr_p8 = tcg_temp_ebb_new();
568         if ((mop[0] ^ memop) & MO_BSWAP) {
569             TCGv_i64 t = tcg_temp_ebb_new_i64();
570 
571             tcg_gen_bswap64_i64(t, x);
572             gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
573             tcg_gen_bswap64_i64(t, y);
574             tcg_gen_addi_tl(addr_p8, addr, 8);
575             gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
576             tcg_temp_free_i64(t);
577         } else {
578             gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
579             tcg_gen_addi_tl(addr_p8, addr, 8);
580             gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
581         }
582         tcg_temp_free(addr_p8);
583     } else {
584         TCGv_i64 a64 = maybe_extend_addr64(addr);
585         gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
586         maybe_free_addr64(a64);
587     }
588 
589     plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
590 }
591 
592 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
593 {
594     switch (opc & MO_SSIZE) {
595     case MO_SB:
596         tcg_gen_ext8s_i32(ret, val);
597         break;
598     case MO_UB:
599         tcg_gen_ext8u_i32(ret, val);
600         break;
601     case MO_SW:
602         tcg_gen_ext16s_i32(ret, val);
603         break;
604     case MO_UW:
605         tcg_gen_ext16u_i32(ret, val);
606         break;
607     default:
608         tcg_gen_mov_i32(ret, val);
609         break;
610     }
611 }
612 
613 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
614 {
615     switch (opc & MO_SSIZE) {
616     case MO_SB:
617         tcg_gen_ext8s_i64(ret, val);
618         break;
619     case MO_UB:
620         tcg_gen_ext8u_i64(ret, val);
621         break;
622     case MO_SW:
623         tcg_gen_ext16s_i64(ret, val);
624         break;
625     case MO_UW:
626         tcg_gen_ext16u_i64(ret, val);
627         break;
628     case MO_SL:
629         tcg_gen_ext32s_i64(ret, val);
630         break;
631     case MO_UL:
632         tcg_gen_ext32u_i64(ret, val);
633         break;
634     default:
635         tcg_gen_mov_i64(ret, val);
636         break;
637     }
638 }
639 
640 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
641                                   TCGv_i32, TCGv_i32, TCGv_i32);
642 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
643                                   TCGv_i64, TCGv_i64, TCGv_i32);
644 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
645                                    TCGv_i128, TCGv_i128, TCGv_i32);
646 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
647                                   TCGv_i32, TCGv_i32);
648 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
649                                   TCGv_i64, TCGv_i32);
650 
651 #ifdef CONFIG_ATOMIC64
652 # define WITH_ATOMIC64(X) X,
653 #else
654 # define WITH_ATOMIC64(X)
655 #endif
656 #ifdef CONFIG_CMPXCHG128
657 # define WITH_ATOMIC128(X) X,
658 #else
659 # define WITH_ATOMIC128(X)
660 #endif
661 
662 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
663     [MO_8] = gen_helper_atomic_cmpxchgb,
664     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
665     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
666     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
667     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
668     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
669     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
670     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
671     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
672 };
673 
674 void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
675                                    TCGv_i32 newv, TCGArg idx, MemOp memop)
676 {
677     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
678     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
679 
680     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
681 
682     tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
683     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
684     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
685     tcg_temp_free_i32(t2);
686 
687     if (memop & MO_SIGN) {
688         tcg_gen_ext_i32(retv, t1, memop);
689     } else {
690         tcg_gen_mov_i32(retv, t1);
691     }
692     tcg_temp_free_i32(t1);
693 }
694 
695 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
696                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
697 {
698     gen_atomic_cx_i32 gen;
699     TCGv_i64 a64;
700     MemOpIdx oi;
701 
702     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
703         tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
704         return;
705     }
706 
707     memop = tcg_canonicalize_memop(memop, 0, 0);
708     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
709     tcg_debug_assert(gen != NULL);
710 
711     oi = make_memop_idx(memop & ~MO_SIGN, idx);
712     a64 = maybe_extend_addr64(addr);
713     gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
714     maybe_free_addr64(a64);
715 
716     if (memop & MO_SIGN) {
717         tcg_gen_ext_i32(retv, retv, memop);
718     }
719 }
720 
721 void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
722                                    TCGv_i64 newv, TCGArg idx, MemOp memop)
723 {
724     TCGv_i64 t1, t2;
725 
726     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
727         tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
728                                       TCGV_LOW(newv), idx, memop);
729         if (memop & MO_SIGN) {
730             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
731         } else {
732             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
733         }
734         return;
735     }
736 
737     t1 = tcg_temp_ebb_new_i64();
738     t2 = tcg_temp_ebb_new_i64();
739 
740     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
741 
742     tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
743     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
744     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
745     tcg_temp_free_i64(t2);
746 
747     if (memop & MO_SIGN) {
748         tcg_gen_ext_i64(retv, t1, memop);
749     } else {
750         tcg_gen_mov_i64(retv, t1);
751     }
752     tcg_temp_free_i64(t1);
753 }
754 
755 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
756                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
757 {
758     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
759         tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
760         return;
761     }
762 
763     if ((memop & MO_SIZE) == MO_64) {
764         gen_atomic_cx_i64 gen;
765 
766         memop = tcg_canonicalize_memop(memop, 1, 0);
767         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
768         if (gen) {
769             MemOpIdx oi = make_memop_idx(memop, idx);
770             TCGv_i64 a64 = maybe_extend_addr64(addr);
771             gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
772             maybe_free_addr64(a64);
773             return;
774         }
775 
776         gen_helper_exit_atomic(cpu_env);
777 
778         /*
779          * Produce a result for a well-formed opcode stream.  This satisfies
780          * liveness for set before used, which happens before this dead code
781          * is removed.
782          */
783         tcg_gen_movi_i64(retv, 0);
784         return;
785     }
786 
787     if (TCG_TARGET_REG_BITS == 32) {
788         tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
789                                    TCGV_LOW(newv), idx, memop);
790         if (memop & MO_SIGN) {
791             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
792         } else {
793             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
794         }
795     } else {
796         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
797         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
798         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
799 
800         tcg_gen_extrl_i64_i32(c32, cmpv);
801         tcg_gen_extrl_i64_i32(n32, newv);
802         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
803         tcg_temp_free_i32(c32);
804         tcg_temp_free_i32(n32);
805 
806         tcg_gen_extu_i32_i64(retv, r32);
807         tcg_temp_free_i32(r32);
808 
809         if (memop & MO_SIGN) {
810             tcg_gen_ext_i64(retv, retv, memop);
811         }
812     }
813 }
814 
815 void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
816                                     TCGv_i128 newv, TCGArg idx, MemOp memop)
817 {
818     if (TCG_TARGET_REG_BITS == 32) {
819         /* Inline expansion below is simply too large for 32-bit hosts. */
820         gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
821                                   ? gen_helper_nonatomic_cmpxchgo_le
822                                   : gen_helper_nonatomic_cmpxchgo_be);
823         MemOpIdx oi = make_memop_idx(memop, idx);
824         TCGv_i64 a64;
825 
826         tcg_debug_assert((memop & MO_SIZE) == MO_128);
827         tcg_debug_assert((memop & MO_SIGN) == 0);
828 
829         a64 = maybe_extend_addr64(addr);
830         gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
831         maybe_free_addr64(a64);
832     } else {
833         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
834         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
835         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
836         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
837         TCGv_i64 z = tcg_constant_i64(0);
838 
839         tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
840 
841         /* Compare i128 */
842         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
843         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
844         tcg_gen_or_i64(t0, t0, t1);
845 
846         /* tmpv = equal ? newv : oldv */
847         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
848                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
849         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
850                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
851 
852         /* Unconditional writeback. */
853         tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
854         tcg_gen_mov_i128(retv, oldv);
855 
856         tcg_temp_free_i64(t0);
857         tcg_temp_free_i64(t1);
858         tcg_temp_free_i128(tmpv);
859         tcg_temp_free_i128(oldv);
860     }
861 }
862 
863 void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
864                                  TCGv_i128 newv, TCGArg idx, MemOp memop)
865 {
866     gen_atomic_cx_i128 gen;
867 
868     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
869         tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
870         return;
871     }
872 
873     tcg_debug_assert((memop & MO_SIZE) == MO_128);
874     tcg_debug_assert((memop & MO_SIGN) == 0);
875     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
876 
877     if (gen) {
878         MemOpIdx oi = make_memop_idx(memop, idx);
879         TCGv_i64 a64 = maybe_extend_addr64(addr);
880         gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
881         maybe_free_addr64(a64);
882         return;
883     }
884 
885     gen_helper_exit_atomic(cpu_env);
886 
887     /*
888      * Produce a result for a well-formed opcode stream.  This satisfies
889      * liveness for set before used, which happens before this dead code
890      * is removed.
891      */
892     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
893     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
894 }
895 
896 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
897                                 TCGArg idx, MemOp memop, bool new_val,
898                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
899 {
900     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
901     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
902 
903     memop = tcg_canonicalize_memop(memop, 0, 0);
904 
905     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
906     tcg_gen_ext_i32(t2, val, memop);
907     gen(t2, t1, t2);
908     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
909 
910     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
911     tcg_temp_free_i32(t1);
912     tcg_temp_free_i32(t2);
913 }
914 
915 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
916                              TCGArg idx, MemOp memop, void * const table[])
917 {
918     gen_atomic_op_i32 gen;
919     TCGv_i64 a64;
920     MemOpIdx oi;
921 
922     memop = tcg_canonicalize_memop(memop, 0, 0);
923 
924     gen = table[memop & (MO_SIZE | MO_BSWAP)];
925     tcg_debug_assert(gen != NULL);
926 
927     oi = make_memop_idx(memop & ~MO_SIGN, idx);
928     a64 = maybe_extend_addr64(addr);
929     gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
930     maybe_free_addr64(a64);
931 
932     if (memop & MO_SIGN) {
933         tcg_gen_ext_i32(ret, ret, memop);
934     }
935 }
936 
937 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
938                                 TCGArg idx, MemOp memop, bool new_val,
939                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
940 {
941     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
942     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
943 
944     memop = tcg_canonicalize_memop(memop, 1, 0);
945 
946     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
947     tcg_gen_ext_i64(t2, val, memop);
948     gen(t2, t1, t2);
949     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
950 
951     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
952     tcg_temp_free_i64(t1);
953     tcg_temp_free_i64(t2);
954 }
955 
956 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
957                              TCGArg idx, MemOp memop, void * const table[])
958 {
959     memop = tcg_canonicalize_memop(memop, 1, 0);
960 
961     if ((memop & MO_SIZE) == MO_64) {
962 #ifdef CONFIG_ATOMIC64
963         gen_atomic_op_i64 gen;
964         TCGv_i64 a64;
965         MemOpIdx oi;
966 
967         gen = table[memop & (MO_SIZE | MO_BSWAP)];
968         tcg_debug_assert(gen != NULL);
969 
970         oi = make_memop_idx(memop & ~MO_SIGN, idx);
971         a64 = maybe_extend_addr64(addr);
972         gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
973         maybe_free_addr64(a64);
974 #else
975         gen_helper_exit_atomic(cpu_env);
976         /* Produce a result, so that we have a well-formed opcode stream
977            with respect to uses of the result in the (dead) code following.  */
978         tcg_gen_movi_i64(ret, 0);
979 #endif /* CONFIG_ATOMIC64 */
980     } else {
981         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
982         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
983 
984         tcg_gen_extrl_i64_i32(v32, val);
985         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
986         tcg_temp_free_i32(v32);
987 
988         tcg_gen_extu_i32_i64(ret, r32);
989         tcg_temp_free_i32(r32);
990 
991         if (memop & MO_SIGN) {
992             tcg_gen_ext_i64(ret, ret, memop);
993         }
994     }
995 }
996 
997 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
998 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
999     [MO_8] = gen_helper_atomic_##NAME##b,                               \
1000     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
1001     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
1002     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
1003     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
1004     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
1005     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
1006 };                                                                      \
1007 void tcg_gen_atomic_##NAME##_i32                                        \
1008     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
1009 {                                                                       \
1010     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1011         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
1012     } else {                                                            \
1013         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
1014                             tcg_gen_##OP##_i32);                        \
1015     }                                                                   \
1016 }                                                                       \
1017 void tcg_gen_atomic_##NAME##_i64                                        \
1018     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
1019 {                                                                       \
1020     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1021         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
1022     } else {                                                            \
1023         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
1024                             tcg_gen_##OP##_i64);                        \
1025     }                                                                   \
1026 }
1027 
1028 GEN_ATOMIC_HELPER(fetch_add, add, 0)
1029 GEN_ATOMIC_HELPER(fetch_and, and, 0)
1030 GEN_ATOMIC_HELPER(fetch_or, or, 0)
1031 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1032 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1033 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1034 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1035 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1036 
1037 GEN_ATOMIC_HELPER(add_fetch, add, 1)
1038 GEN_ATOMIC_HELPER(and_fetch, and, 1)
1039 GEN_ATOMIC_HELPER(or_fetch, or, 1)
1040 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1041 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1042 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1043 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1044 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1045 
1046 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1047 {
1048     tcg_gen_mov_i32(r, b);
1049 }
1050 
1051 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1052 {
1053     tcg_gen_mov_i64(r, b);
1054 }
1055 
1056 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1057 
1058 #undef GEN_ATOMIC_HELPER
1059