xref: /qemu/tcg/tcg-op-ldst.c (revision ddfdd4178beb56543ac98976efbc885d7e2b5150)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-temp-internal.h"
29 #include "tcg/tcg-op.h"
30 #include "tcg/tcg-mo.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 
34 
35 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
36 {
37     /* Trigger the asserts within as early as possible.  */
38     unsigned a_bits = get_alignment_bits(op);
39 
40     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
41     if (a_bits == (op & MO_SIZE)) {
42         op = (op & ~MO_AMASK) | MO_ALIGN;
43     }
44 
45     switch (op & MO_SIZE) {
46     case MO_8:
47         op &= ~MO_BSWAP;
48         break;
49     case MO_16:
50         break;
51     case MO_32:
52         if (!is64) {
53             op &= ~MO_SIGN;
54         }
55         break;
56     case MO_64:
57         if (is64) {
58             op &= ~MO_SIGN;
59             break;
60         }
61         /* fall through */
62     default:
63         g_assert_not_reached();
64     }
65     if (st) {
66         op &= ~MO_SIGN;
67     }
68     return op;
69 }
70 
71 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
72                          MemOp memop, TCGArg idx)
73 {
74     MemOpIdx oi = make_memop_idx(memop, idx);
75 #if TARGET_LONG_BITS == 32
76     tcg_gen_op3i_i32(opc, val, addr, oi);
77 #else
78     if (TCG_TARGET_REG_BITS == 32) {
79         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
80     } else {
81         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
82     }
83 #endif
84 }
85 
86 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
87                          MemOp memop, TCGArg idx)
88 {
89     MemOpIdx oi = make_memop_idx(memop, idx);
90 #if TARGET_LONG_BITS == 32
91     if (TCG_TARGET_REG_BITS == 32) {
92         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
93     } else {
94         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
95     }
96 #else
97     if (TCG_TARGET_REG_BITS == 32) {
98         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
99                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
100     } else {
101         tcg_gen_op3i_i64(opc, val, addr, oi);
102     }
103 #endif
104 }
105 
106 static void tcg_gen_req_mo(TCGBar type)
107 {
108 #ifdef TCG_GUEST_DEFAULT_MO
109     type &= TCG_GUEST_DEFAULT_MO;
110 #endif
111     type &= ~TCG_TARGET_DEFAULT_MO;
112     if (type) {
113         tcg_gen_mb(type | TCG_BAR_SC);
114     }
115 }
116 
117 static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
118 {
119 #ifdef CONFIG_PLUGIN
120     if (tcg_ctx->plugin_insn != NULL) {
121         /* Save a copy of the vaddr for use after a load.  */
122         TCGv temp = tcg_temp_new();
123         tcg_gen_mov_tl(temp, vaddr);
124         return temp;
125     }
126 #endif
127     return vaddr;
128 }
129 
130 static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
131                                      enum qemu_plugin_mem_rw rw)
132 {
133 #ifdef CONFIG_PLUGIN
134     if (tcg_ctx->plugin_insn != NULL) {
135         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
136         plugin_gen_empty_mem_callback(vaddr, info);
137         tcg_temp_free(vaddr);
138     }
139 #endif
140 }
141 
142 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
143 {
144     MemOp orig_memop;
145     MemOpIdx oi;
146 
147     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
148     memop = tcg_canonicalize_memop(memop, 0, 0);
149     oi = make_memop_idx(memop, idx);
150 
151     orig_memop = memop;
152     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
153         memop &= ~MO_BSWAP;
154         /* The bswap primitive benefits from zero-extended input.  */
155         if ((memop & MO_SSIZE) == MO_SW) {
156             memop &= ~MO_SIGN;
157         }
158     }
159 
160     addr = plugin_prep_mem_callbacks(addr);
161     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
162     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
163 
164     if ((orig_memop ^ memop) & MO_BSWAP) {
165         switch (orig_memop & MO_SIZE) {
166         case MO_16:
167             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
168                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
169                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
170             break;
171         case MO_32:
172             tcg_gen_bswap32_i32(val, val);
173             break;
174         default:
175             g_assert_not_reached();
176         }
177     }
178 }
179 
180 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
181 {
182     TCGv_i32 swap = NULL;
183     MemOpIdx oi;
184 
185     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
186     memop = tcg_canonicalize_memop(memop, 0, 1);
187     oi = make_memop_idx(memop, idx);
188 
189     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
190         swap = tcg_temp_ebb_new_i32();
191         switch (memop & MO_SIZE) {
192         case MO_16:
193             tcg_gen_bswap16_i32(swap, val, 0);
194             break;
195         case MO_32:
196             tcg_gen_bswap32_i32(swap, val);
197             break;
198         default:
199             g_assert_not_reached();
200         }
201         val = swap;
202         memop &= ~MO_BSWAP;
203     }
204 
205     addr = plugin_prep_mem_callbacks(addr);
206     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
207         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
208     } else {
209         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
210     }
211     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
212 
213     if (swap) {
214         tcg_temp_free_i32(swap);
215     }
216 }
217 
218 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
219 {
220     MemOp orig_memop;
221     MemOpIdx oi;
222 
223     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
224         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
225         if (memop & MO_SIGN) {
226             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
227         } else {
228             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
229         }
230         return;
231     }
232 
233     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
234     memop = tcg_canonicalize_memop(memop, 1, 0);
235     oi = make_memop_idx(memop, idx);
236 
237     orig_memop = memop;
238     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
239         memop &= ~MO_BSWAP;
240         /* The bswap primitive benefits from zero-extended input.  */
241         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
242             memop &= ~MO_SIGN;
243         }
244     }
245 
246     addr = plugin_prep_mem_callbacks(addr);
247     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
248     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
249 
250     if ((orig_memop ^ memop) & MO_BSWAP) {
251         int flags = (orig_memop & MO_SIGN
252                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
253                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
254         switch (orig_memop & MO_SIZE) {
255         case MO_16:
256             tcg_gen_bswap16_i64(val, val, flags);
257             break;
258         case MO_32:
259             tcg_gen_bswap32_i64(val, val, flags);
260             break;
261         case MO_64:
262             tcg_gen_bswap64_i64(val, val);
263             break;
264         default:
265             g_assert_not_reached();
266         }
267     }
268 }
269 
270 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
271 {
272     TCGv_i64 swap = NULL;
273     MemOpIdx oi;
274 
275     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
276         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
277         return;
278     }
279 
280     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
281     memop = tcg_canonicalize_memop(memop, 1, 1);
282     oi = make_memop_idx(memop, idx);
283 
284     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
285         swap = tcg_temp_ebb_new_i64();
286         switch (memop & MO_SIZE) {
287         case MO_16:
288             tcg_gen_bswap16_i64(swap, val, 0);
289             break;
290         case MO_32:
291             tcg_gen_bswap32_i64(swap, val, 0);
292             break;
293         case MO_64:
294             tcg_gen_bswap64_i64(swap, val);
295             break;
296         default:
297             g_assert_not_reached();
298         }
299         val = swap;
300         memop &= ~MO_BSWAP;
301     }
302 
303     addr = plugin_prep_mem_callbacks(addr);
304     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
305     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
306 
307     if (swap) {
308         tcg_temp_free_i64(swap);
309     }
310 }
311 
312 /*
313  * Return true if @mop, without knowledge of the pointer alignment,
314  * does not require 16-byte atomicity, and it would be adventagous
315  * to avoid a call to a helper function.
316  */
317 static bool use_two_i64_for_i128(MemOp mop)
318 {
319 #ifdef CONFIG_SOFTMMU
320     /* Two softmmu tlb lookups is larger than one function call. */
321     return false;
322 #else
323     /*
324      * For user-only, two 64-bit operations may well be smaller than a call.
325      * Determine if that would be legal for the requested atomicity.
326      */
327     switch (mop & MO_ATOM_MASK) {
328     case MO_ATOM_NONE:
329     case MO_ATOM_IFALIGN_PAIR:
330         return true;
331     case MO_ATOM_IFALIGN:
332     case MO_ATOM_SUBALIGN:
333     case MO_ATOM_WITHIN16:
334     case MO_ATOM_WITHIN16_PAIR:
335         /* In a serialized context, no atomicity is required. */
336         return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
337     default:
338         g_assert_not_reached();
339     }
340 #endif
341 }
342 
343 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
344 {
345     MemOp mop_1 = orig, mop_2;
346 
347     tcg_debug_assert((orig & MO_SIZE) == MO_128);
348     tcg_debug_assert((orig & MO_SIGN) == 0);
349 
350     /* Reduce the size to 64-bit. */
351     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
352 
353     /* Retain the alignment constraints of the original. */
354     switch (orig & MO_AMASK) {
355     case MO_UNALN:
356     case MO_ALIGN_2:
357     case MO_ALIGN_4:
358         mop_2 = mop_1;
359         break;
360     case MO_ALIGN_8:
361         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
362         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
363         mop_2 = mop_1;
364         break;
365     case MO_ALIGN:
366         /* Second has 8-byte alignment; first has 16-byte alignment. */
367         mop_2 = mop_1;
368         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
369         break;
370     case MO_ALIGN_16:
371     case MO_ALIGN_32:
372     case MO_ALIGN_64:
373         /* Second has 8-byte alignment; first retains original. */
374         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
375         break;
376     default:
377         g_assert_not_reached();
378     }
379 
380     /* Use a memory ordering implemented by the host. */
381     if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
382         mop_1 &= ~MO_BSWAP;
383         mop_2 &= ~MO_BSWAP;
384     }
385 
386     ret[0] = mop_1;
387     ret[1] = mop_2;
388 }
389 
390 #if TARGET_LONG_BITS == 64
391 #define tcg_temp_ebb_new  tcg_temp_ebb_new_i64
392 #else
393 #define tcg_temp_ebb_new  tcg_temp_ebb_new_i32
394 #endif
395 
396 static TCGv_i64 maybe_extend_addr64(TCGv addr)
397 {
398 #if TARGET_LONG_BITS == 32
399     TCGv_i64 a64 = tcg_temp_ebb_new_i64();
400     tcg_gen_extu_i32_i64(a64, addr);
401     return a64;
402 #else
403     return addr;
404 #endif
405 }
406 
407 static void maybe_free_addr64(TCGv_i64 a64)
408 {
409 #if TARGET_LONG_BITS == 32
410     tcg_temp_free_i64(a64);
411 #endif
412 }
413 
414 void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
415 {
416     const MemOpIdx oi = make_memop_idx(memop, idx);
417 
418     tcg_debug_assert((memop & MO_SIZE) == MO_128);
419     tcg_debug_assert((memop & MO_SIGN) == 0);
420 
421     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
422     addr = plugin_prep_mem_callbacks(addr);
423 
424     /* TODO: For now, force 32-bit hosts to use the helper. */
425     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
426         TCGv_i64 lo, hi;
427         TCGArg addr_arg;
428         MemOpIdx adj_oi;
429         bool need_bswap = false;
430 
431         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
432             lo = TCGV128_HIGH(val);
433             hi = TCGV128_LOW(val);
434             adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
435             need_bswap = true;
436         } else {
437             lo = TCGV128_LOW(val);
438             hi = TCGV128_HIGH(val);
439             adj_oi = oi;
440         }
441 
442 #if TARGET_LONG_BITS == 32
443         addr_arg = tcgv_i32_arg(addr);
444 #else
445         addr_arg = tcgv_i64_arg(addr);
446 #endif
447         tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
448 
449         if (need_bswap) {
450             tcg_gen_bswap64_i64(lo, lo);
451             tcg_gen_bswap64_i64(hi, hi);
452         }
453     } else if (use_two_i64_for_i128(memop)) {
454         MemOp mop[2];
455         TCGv addr_p8;
456         TCGv_i64 x, y;
457 
458         canonicalize_memop_i128_as_i64(mop, memop);
459 
460         /*
461          * Since there are no global TCGv_i128, there is no visible state
462          * changed if the second load faults.  Load directly into the two
463          * subwords.
464          */
465         if ((memop & MO_BSWAP) == MO_LE) {
466             x = TCGV128_LOW(val);
467             y = TCGV128_HIGH(val);
468         } else {
469             x = TCGV128_HIGH(val);
470             y = TCGV128_LOW(val);
471         }
472 
473         gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
474 
475         if ((mop[0] ^ memop) & MO_BSWAP) {
476             tcg_gen_bswap64_i64(x, x);
477         }
478 
479         addr_p8 = tcg_temp_ebb_new();
480         tcg_gen_addi_tl(addr_p8, addr, 8);
481         gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
482         tcg_temp_free(addr_p8);
483 
484         if ((mop[0] ^ memop) & MO_BSWAP) {
485             tcg_gen_bswap64_i64(y, y);
486         }
487     } else {
488         TCGv_i64 a64 = maybe_extend_addr64(addr);
489         gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
490         maybe_free_addr64(a64);
491     }
492 
493     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
494 }
495 
496 void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
497 {
498     const MemOpIdx oi = make_memop_idx(memop, idx);
499 
500     tcg_debug_assert((memop & MO_SIZE) == MO_128);
501     tcg_debug_assert((memop & MO_SIGN) == 0);
502 
503     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
504     addr = plugin_prep_mem_callbacks(addr);
505 
506     /* TODO: For now, force 32-bit hosts to use the helper. */
507 
508     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
509         TCGv_i64 lo, hi;
510         TCGArg addr_arg;
511         MemOpIdx adj_oi;
512         bool need_bswap = false;
513 
514         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
515             lo = tcg_temp_new_i64();
516             hi = tcg_temp_new_i64();
517             tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
518             tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
519             adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
520             need_bswap = true;
521         } else {
522             lo = TCGV128_LOW(val);
523             hi = TCGV128_HIGH(val);
524             adj_oi = oi;
525         }
526 
527 #if TARGET_LONG_BITS == 32
528         addr_arg = tcgv_i32_arg(addr);
529 #else
530         addr_arg = tcgv_i64_arg(addr);
531 #endif
532         tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
533 
534         if (need_bswap) {
535             tcg_temp_free_i64(lo);
536             tcg_temp_free_i64(hi);
537         }
538     } else if (use_two_i64_for_i128(memop)) {
539         MemOp mop[2];
540         TCGv addr_p8;
541         TCGv_i64 x, y;
542 
543         canonicalize_memop_i128_as_i64(mop, memop);
544 
545         if ((memop & MO_BSWAP) == MO_LE) {
546             x = TCGV128_LOW(val);
547             y = TCGV128_HIGH(val);
548         } else {
549             x = TCGV128_HIGH(val);
550             y = TCGV128_LOW(val);
551         }
552 
553         addr_p8 = tcg_temp_ebb_new();
554         if ((mop[0] ^ memop) & MO_BSWAP) {
555             TCGv_i64 t = tcg_temp_ebb_new_i64();
556 
557             tcg_gen_bswap64_i64(t, x);
558             gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
559             tcg_gen_bswap64_i64(t, y);
560             tcg_gen_addi_tl(addr_p8, addr, 8);
561             gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
562             tcg_temp_free_i64(t);
563         } else {
564             gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
565             tcg_gen_addi_tl(addr_p8, addr, 8);
566             gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
567         }
568         tcg_temp_free(addr_p8);
569     } else {
570         TCGv_i64 a64 = maybe_extend_addr64(addr);
571         gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
572         maybe_free_addr64(a64);
573     }
574 
575     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
576 }
577 
578 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
579 {
580     switch (opc & MO_SSIZE) {
581     case MO_SB:
582         tcg_gen_ext8s_i32(ret, val);
583         break;
584     case MO_UB:
585         tcg_gen_ext8u_i32(ret, val);
586         break;
587     case MO_SW:
588         tcg_gen_ext16s_i32(ret, val);
589         break;
590     case MO_UW:
591         tcg_gen_ext16u_i32(ret, val);
592         break;
593     default:
594         tcg_gen_mov_i32(ret, val);
595         break;
596     }
597 }
598 
599 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
600 {
601     switch (opc & MO_SSIZE) {
602     case MO_SB:
603         tcg_gen_ext8s_i64(ret, val);
604         break;
605     case MO_UB:
606         tcg_gen_ext8u_i64(ret, val);
607         break;
608     case MO_SW:
609         tcg_gen_ext16s_i64(ret, val);
610         break;
611     case MO_UW:
612         tcg_gen_ext16u_i64(ret, val);
613         break;
614     case MO_SL:
615         tcg_gen_ext32s_i64(ret, val);
616         break;
617     case MO_UL:
618         tcg_gen_ext32u_i64(ret, val);
619         break;
620     default:
621         tcg_gen_mov_i64(ret, val);
622         break;
623     }
624 }
625 
626 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
627                                   TCGv_i32, TCGv_i32, TCGv_i32);
628 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
629                                   TCGv_i64, TCGv_i64, TCGv_i32);
630 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
631                                    TCGv_i128, TCGv_i128, TCGv_i32);
632 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
633                                   TCGv_i32, TCGv_i32);
634 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
635                                   TCGv_i64, TCGv_i32);
636 
637 #ifdef CONFIG_ATOMIC64
638 # define WITH_ATOMIC64(X) X,
639 #else
640 # define WITH_ATOMIC64(X)
641 #endif
642 #ifdef CONFIG_CMPXCHG128
643 # define WITH_ATOMIC128(X) X,
644 #else
645 # define WITH_ATOMIC128(X)
646 #endif
647 
648 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
649     [MO_8] = gen_helper_atomic_cmpxchgb,
650     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
651     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
652     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
653     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
654     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
655     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
656     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
657     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
658 };
659 
660 void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
661                                    TCGv_i32 newv, TCGArg idx, MemOp memop)
662 {
663     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
664     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
665 
666     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
667 
668     tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
669     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
670     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
671     tcg_temp_free_i32(t2);
672 
673     if (memop & MO_SIGN) {
674         tcg_gen_ext_i32(retv, t1, memop);
675     } else {
676         tcg_gen_mov_i32(retv, t1);
677     }
678     tcg_temp_free_i32(t1);
679 }
680 
681 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
682                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
683 {
684     gen_atomic_cx_i32 gen;
685     TCGv_i64 a64;
686     MemOpIdx oi;
687 
688     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
689         tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
690         return;
691     }
692 
693     memop = tcg_canonicalize_memop(memop, 0, 0);
694     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
695     tcg_debug_assert(gen != NULL);
696 
697     oi = make_memop_idx(memop & ~MO_SIGN, idx);
698     a64 = maybe_extend_addr64(addr);
699     gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
700     maybe_free_addr64(a64);
701 
702     if (memop & MO_SIGN) {
703         tcg_gen_ext_i32(retv, retv, memop);
704     }
705 }
706 
707 void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
708                                    TCGv_i64 newv, TCGArg idx, MemOp memop)
709 {
710     TCGv_i64 t1, t2;
711 
712     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
713         tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
714                                       TCGV_LOW(newv), idx, memop);
715         if (memop & MO_SIGN) {
716             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
717         } else {
718             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
719         }
720         return;
721     }
722 
723     t1 = tcg_temp_ebb_new_i64();
724     t2 = tcg_temp_ebb_new_i64();
725 
726     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
727 
728     tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
729     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
730     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
731     tcg_temp_free_i64(t2);
732 
733     if (memop & MO_SIGN) {
734         tcg_gen_ext_i64(retv, t1, memop);
735     } else {
736         tcg_gen_mov_i64(retv, t1);
737     }
738     tcg_temp_free_i64(t1);
739 }
740 
741 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
742                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
743 {
744     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
745         tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
746         return;
747     }
748 
749     if ((memop & MO_SIZE) == MO_64) {
750         gen_atomic_cx_i64 gen;
751 
752         memop = tcg_canonicalize_memop(memop, 1, 0);
753         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
754         if (gen) {
755             MemOpIdx oi = make_memop_idx(memop, idx);
756             TCGv_i64 a64 = maybe_extend_addr64(addr);
757             gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
758             maybe_free_addr64(a64);
759             return;
760         }
761 
762         gen_helper_exit_atomic(cpu_env);
763 
764         /*
765          * Produce a result for a well-formed opcode stream.  This satisfies
766          * liveness for set before used, which happens before this dead code
767          * is removed.
768          */
769         tcg_gen_movi_i64(retv, 0);
770         return;
771     }
772 
773     if (TCG_TARGET_REG_BITS == 32) {
774         tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
775                                    TCGV_LOW(newv), idx, memop);
776         if (memop & MO_SIGN) {
777             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
778         } else {
779             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
780         }
781     } else {
782         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
783         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
784         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
785 
786         tcg_gen_extrl_i64_i32(c32, cmpv);
787         tcg_gen_extrl_i64_i32(n32, newv);
788         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
789         tcg_temp_free_i32(c32);
790         tcg_temp_free_i32(n32);
791 
792         tcg_gen_extu_i32_i64(retv, r32);
793         tcg_temp_free_i32(r32);
794 
795         if (memop & MO_SIGN) {
796             tcg_gen_ext_i64(retv, retv, memop);
797         }
798     }
799 }
800 
801 void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
802                                     TCGv_i128 newv, TCGArg idx, MemOp memop)
803 {
804     if (TCG_TARGET_REG_BITS == 32) {
805         /* Inline expansion below is simply too large for 32-bit hosts. */
806         gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
807                                   ? gen_helper_nonatomic_cmpxchgo_le
808                                   : gen_helper_nonatomic_cmpxchgo_be);
809         MemOpIdx oi = make_memop_idx(memop, idx);
810         TCGv_i64 a64;
811 
812         tcg_debug_assert((memop & MO_SIZE) == MO_128);
813         tcg_debug_assert((memop & MO_SIGN) == 0);
814 
815         a64 = maybe_extend_addr64(addr);
816         gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
817         maybe_free_addr64(a64);
818     } else {
819         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
820         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
821         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
822         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
823         TCGv_i64 z = tcg_constant_i64(0);
824 
825         tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
826 
827         /* Compare i128 */
828         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
829         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
830         tcg_gen_or_i64(t0, t0, t1);
831 
832         /* tmpv = equal ? newv : oldv */
833         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
834                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
835         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
836                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
837 
838         /* Unconditional writeback. */
839         tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
840         tcg_gen_mov_i128(retv, oldv);
841 
842         tcg_temp_free_i64(t0);
843         tcg_temp_free_i64(t1);
844         tcg_temp_free_i128(tmpv);
845         tcg_temp_free_i128(oldv);
846     }
847 }
848 
849 void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
850                                  TCGv_i128 newv, TCGArg idx, MemOp memop)
851 {
852     gen_atomic_cx_i128 gen;
853 
854     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
855         tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
856         return;
857     }
858 
859     tcg_debug_assert((memop & MO_SIZE) == MO_128);
860     tcg_debug_assert((memop & MO_SIGN) == 0);
861     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
862 
863     if (gen) {
864         MemOpIdx oi = make_memop_idx(memop, idx);
865         TCGv_i64 a64 = maybe_extend_addr64(addr);
866         gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
867         maybe_free_addr64(a64);
868         return;
869     }
870 
871     gen_helper_exit_atomic(cpu_env);
872 
873     /*
874      * Produce a result for a well-formed opcode stream.  This satisfies
875      * liveness for set before used, which happens before this dead code
876      * is removed.
877      */
878     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
879     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
880 }
881 
882 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
883                                 TCGArg idx, MemOp memop, bool new_val,
884                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
885 {
886     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
887     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
888 
889     memop = tcg_canonicalize_memop(memop, 0, 0);
890 
891     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
892     tcg_gen_ext_i32(t2, val, memop);
893     gen(t2, t1, t2);
894     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
895 
896     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
897     tcg_temp_free_i32(t1);
898     tcg_temp_free_i32(t2);
899 }
900 
901 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
902                              TCGArg idx, MemOp memop, void * const table[])
903 {
904     gen_atomic_op_i32 gen;
905     TCGv_i64 a64;
906     MemOpIdx oi;
907 
908     memop = tcg_canonicalize_memop(memop, 0, 0);
909 
910     gen = table[memop & (MO_SIZE | MO_BSWAP)];
911     tcg_debug_assert(gen != NULL);
912 
913     oi = make_memop_idx(memop & ~MO_SIGN, idx);
914     a64 = maybe_extend_addr64(addr);
915     gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
916     maybe_free_addr64(a64);
917 
918     if (memop & MO_SIGN) {
919         tcg_gen_ext_i32(ret, ret, memop);
920     }
921 }
922 
923 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
924                                 TCGArg idx, MemOp memop, bool new_val,
925                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
926 {
927     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
928     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
929 
930     memop = tcg_canonicalize_memop(memop, 1, 0);
931 
932     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
933     tcg_gen_ext_i64(t2, val, memop);
934     gen(t2, t1, t2);
935     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
936 
937     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
938     tcg_temp_free_i64(t1);
939     tcg_temp_free_i64(t2);
940 }
941 
942 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
943                              TCGArg idx, MemOp memop, void * const table[])
944 {
945     memop = tcg_canonicalize_memop(memop, 1, 0);
946 
947     if ((memop & MO_SIZE) == MO_64) {
948 #ifdef CONFIG_ATOMIC64
949         gen_atomic_op_i64 gen;
950         TCGv_i64 a64;
951         MemOpIdx oi;
952 
953         gen = table[memop & (MO_SIZE | MO_BSWAP)];
954         tcg_debug_assert(gen != NULL);
955 
956         oi = make_memop_idx(memop & ~MO_SIGN, idx);
957         a64 = maybe_extend_addr64(addr);
958         gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
959         maybe_free_addr64(a64);
960 #else
961         gen_helper_exit_atomic(cpu_env);
962         /* Produce a result, so that we have a well-formed opcode stream
963            with respect to uses of the result in the (dead) code following.  */
964         tcg_gen_movi_i64(ret, 0);
965 #endif /* CONFIG_ATOMIC64 */
966     } else {
967         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
968         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
969 
970         tcg_gen_extrl_i64_i32(v32, val);
971         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
972         tcg_temp_free_i32(v32);
973 
974         tcg_gen_extu_i32_i64(ret, r32);
975         tcg_temp_free_i32(r32);
976 
977         if (memop & MO_SIGN) {
978             tcg_gen_ext_i64(ret, ret, memop);
979         }
980     }
981 }
982 
983 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
984 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
985     [MO_8] = gen_helper_atomic_##NAME##b,                               \
986     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
987     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
988     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
989     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
990     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
991     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
992 };                                                                      \
993 void tcg_gen_atomic_##NAME##_i32                                        \
994     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
995 {                                                                       \
996     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
997         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
998     } else {                                                            \
999         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
1000                             tcg_gen_##OP##_i32);                        \
1001     }                                                                   \
1002 }                                                                       \
1003 void tcg_gen_atomic_##NAME##_i64                                        \
1004     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
1005 {                                                                       \
1006     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1007         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
1008     } else {                                                            \
1009         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
1010                             tcg_gen_##OP##_i64);                        \
1011     }                                                                   \
1012 }
1013 
1014 GEN_ATOMIC_HELPER(fetch_add, add, 0)
1015 GEN_ATOMIC_HELPER(fetch_and, and, 0)
1016 GEN_ATOMIC_HELPER(fetch_or, or, 0)
1017 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1018 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1019 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1020 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1021 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1022 
1023 GEN_ATOMIC_HELPER(add_fetch, add, 1)
1024 GEN_ATOMIC_HELPER(and_fetch, and, 1)
1025 GEN_ATOMIC_HELPER(or_fetch, or, 1)
1026 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1027 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1028 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1029 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1030 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1031 
1032 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1033 {
1034     tcg_gen_mov_i32(r, b);
1035 }
1036 
1037 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1038 {
1039     tcg_gen_mov_i64(r, b);
1040 }
1041 
1042 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1043 
1044 #undef GEN_ATOMIC_HELPER
1045