xref: /qemu/tcg/tcg-op-ldst.c (revision eb9d02f24b1ce877a60ffaf6cc1ecc8484740b37)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-temp-internal.h"
29 #include "tcg/tcg-op.h"
30 #include "tcg/tcg-mo.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 
34 
35 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
36 {
37     /* Trigger the asserts within as early as possible.  */
38     unsigned a_bits = get_alignment_bits(op);
39 
40     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
41     if (a_bits == (op & MO_SIZE)) {
42         op = (op & ~MO_AMASK) | MO_ALIGN;
43     }
44 
45     switch (op & MO_SIZE) {
46     case MO_8:
47         op &= ~MO_BSWAP;
48         break;
49     case MO_16:
50         break;
51     case MO_32:
52         if (!is64) {
53             op &= ~MO_SIGN;
54         }
55         break;
56     case MO_64:
57         if (is64) {
58             op &= ~MO_SIGN;
59             break;
60         }
61         /* fall through */
62     default:
63         g_assert_not_reached();
64     }
65     if (st) {
66         op &= ~MO_SIGN;
67     }
68     return op;
69 }
70 
71 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
72                          MemOp memop, TCGArg idx)
73 {
74     MemOpIdx oi = make_memop_idx(memop, idx);
75 #if TARGET_LONG_BITS == 32
76     tcg_gen_op3i_i32(opc, val, addr, oi);
77 #else
78     if (TCG_TARGET_REG_BITS == 32) {
79         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
80     } else {
81         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
82     }
83 #endif
84 }
85 
86 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
87                          MemOp memop, TCGArg idx)
88 {
89     MemOpIdx oi = make_memop_idx(memop, idx);
90 #if TARGET_LONG_BITS == 32
91     if (TCG_TARGET_REG_BITS == 32) {
92         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
93     } else {
94         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
95     }
96 #else
97     if (TCG_TARGET_REG_BITS == 32) {
98         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
99                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
100     } else {
101         tcg_gen_op3i_i64(opc, val, addr, oi);
102     }
103 #endif
104 }
105 
106 static void tcg_gen_req_mo(TCGBar type)
107 {
108 #ifdef TCG_GUEST_DEFAULT_MO
109     type &= TCG_GUEST_DEFAULT_MO;
110 #endif
111     type &= ~TCG_TARGET_DEFAULT_MO;
112     if (type) {
113         tcg_gen_mb(type | TCG_BAR_SC);
114     }
115 }
116 
117 /* Only required for loads, where value might overlap addr. */
118 static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
119 {
120 #ifdef CONFIG_PLUGIN
121     if (tcg_ctx->plugin_insn != NULL) {
122         /* Save a copy of the vaddr for use after a load.  */
123         TCGv temp = tcg_temp_new();
124         tcg_gen_mov_tl(temp, vaddr);
125         return temp;
126     }
127 #endif
128     return NULL;
129 }
130 
131 static void
132 plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
133                          enum qemu_plugin_mem_rw rw)
134 {
135 #ifdef CONFIG_PLUGIN
136     if (tcg_ctx->plugin_insn != NULL) {
137         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
138         plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
139         if (copy_addr) {
140             tcg_temp_free(copy_addr);
141         }
142     }
143 #endif
144 }
145 
146 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
147 {
148     MemOp orig_memop;
149     MemOpIdx oi;
150     TCGv copy_addr;
151 
152     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
153     memop = tcg_canonicalize_memop(memop, 0, 0);
154     oi = make_memop_idx(memop, idx);
155 
156     orig_memop = memop;
157     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
158         memop &= ~MO_BSWAP;
159         /* The bswap primitive benefits from zero-extended input.  */
160         if ((memop & MO_SSIZE) == MO_SW) {
161             memop &= ~MO_SIGN;
162         }
163     }
164 
165     copy_addr = plugin_maybe_preserve_addr(addr);
166     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
167     plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
168 
169     if ((orig_memop ^ memop) & MO_BSWAP) {
170         switch (orig_memop & MO_SIZE) {
171         case MO_16:
172             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
173                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
174                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
175             break;
176         case MO_32:
177             tcg_gen_bswap32_i32(val, val);
178             break;
179         default:
180             g_assert_not_reached();
181         }
182     }
183 }
184 
185 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
186 {
187     TCGv_i32 swap = NULL;
188     MemOpIdx oi;
189 
190     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
191     memop = tcg_canonicalize_memop(memop, 0, 1);
192     oi = make_memop_idx(memop, idx);
193 
194     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
195         swap = tcg_temp_ebb_new_i32();
196         switch (memop & MO_SIZE) {
197         case MO_16:
198             tcg_gen_bswap16_i32(swap, val, 0);
199             break;
200         case MO_32:
201             tcg_gen_bswap32_i32(swap, val);
202             break;
203         default:
204             g_assert_not_reached();
205         }
206         val = swap;
207         memop &= ~MO_BSWAP;
208     }
209 
210     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
211         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
212     } else {
213         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
214     }
215     plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
216 
217     if (swap) {
218         tcg_temp_free_i32(swap);
219     }
220 }
221 
222 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
223 {
224     MemOp orig_memop;
225     MemOpIdx oi;
226     TCGv copy_addr;
227 
228     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
229         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
230         if (memop & MO_SIGN) {
231             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
232         } else {
233             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
234         }
235         return;
236     }
237 
238     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
239     memop = tcg_canonicalize_memop(memop, 1, 0);
240     oi = make_memop_idx(memop, idx);
241 
242     orig_memop = memop;
243     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
244         memop &= ~MO_BSWAP;
245         /* The bswap primitive benefits from zero-extended input.  */
246         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
247             memop &= ~MO_SIGN;
248         }
249     }
250 
251     copy_addr = plugin_maybe_preserve_addr(addr);
252     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
253     plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
254 
255     if ((orig_memop ^ memop) & MO_BSWAP) {
256         int flags = (orig_memop & MO_SIGN
257                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
258                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
259         switch (orig_memop & MO_SIZE) {
260         case MO_16:
261             tcg_gen_bswap16_i64(val, val, flags);
262             break;
263         case MO_32:
264             tcg_gen_bswap32_i64(val, val, flags);
265             break;
266         case MO_64:
267             tcg_gen_bswap64_i64(val, val);
268             break;
269         default:
270             g_assert_not_reached();
271         }
272     }
273 }
274 
275 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
276 {
277     TCGv_i64 swap = NULL;
278     MemOpIdx oi;
279 
280     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
281         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
282         return;
283     }
284 
285     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
286     memop = tcg_canonicalize_memop(memop, 1, 1);
287     oi = make_memop_idx(memop, idx);
288 
289     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
290         swap = tcg_temp_ebb_new_i64();
291         switch (memop & MO_SIZE) {
292         case MO_16:
293             tcg_gen_bswap16_i64(swap, val, 0);
294             break;
295         case MO_32:
296             tcg_gen_bswap32_i64(swap, val, 0);
297             break;
298         case MO_64:
299             tcg_gen_bswap64_i64(swap, val);
300             break;
301         default:
302             g_assert_not_reached();
303         }
304         val = swap;
305         memop &= ~MO_BSWAP;
306     }
307 
308     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
309     plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
310 
311     if (swap) {
312         tcg_temp_free_i64(swap);
313     }
314 }
315 
316 /*
317  * Return true if @mop, without knowledge of the pointer alignment,
318  * does not require 16-byte atomicity, and it would be adventagous
319  * to avoid a call to a helper function.
320  */
321 static bool use_two_i64_for_i128(MemOp mop)
322 {
323 #ifdef CONFIG_SOFTMMU
324     /* Two softmmu tlb lookups is larger than one function call. */
325     return false;
326 #else
327     /*
328      * For user-only, two 64-bit operations may well be smaller than a call.
329      * Determine if that would be legal for the requested atomicity.
330      */
331     switch (mop & MO_ATOM_MASK) {
332     case MO_ATOM_NONE:
333     case MO_ATOM_IFALIGN_PAIR:
334         return true;
335     case MO_ATOM_IFALIGN:
336     case MO_ATOM_SUBALIGN:
337     case MO_ATOM_WITHIN16:
338     case MO_ATOM_WITHIN16_PAIR:
339         /* In a serialized context, no atomicity is required. */
340         return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
341     default:
342         g_assert_not_reached();
343     }
344 #endif
345 }
346 
347 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
348 {
349     MemOp mop_1 = orig, mop_2;
350 
351     tcg_debug_assert((orig & MO_SIZE) == MO_128);
352     tcg_debug_assert((orig & MO_SIGN) == 0);
353 
354     /* Reduce the size to 64-bit. */
355     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
356 
357     /* Retain the alignment constraints of the original. */
358     switch (orig & MO_AMASK) {
359     case MO_UNALN:
360     case MO_ALIGN_2:
361     case MO_ALIGN_4:
362         mop_2 = mop_1;
363         break;
364     case MO_ALIGN_8:
365         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
366         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
367         mop_2 = mop_1;
368         break;
369     case MO_ALIGN:
370         /* Second has 8-byte alignment; first has 16-byte alignment. */
371         mop_2 = mop_1;
372         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
373         break;
374     case MO_ALIGN_16:
375     case MO_ALIGN_32:
376     case MO_ALIGN_64:
377         /* Second has 8-byte alignment; first retains original. */
378         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
379         break;
380     default:
381         g_assert_not_reached();
382     }
383 
384     /* Use a memory ordering implemented by the host. */
385     if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
386         mop_1 &= ~MO_BSWAP;
387         mop_2 &= ~MO_BSWAP;
388     }
389 
390     ret[0] = mop_1;
391     ret[1] = mop_2;
392 }
393 
394 #if TARGET_LONG_BITS == 64
395 #define tcg_temp_ebb_new  tcg_temp_ebb_new_i64
396 #else
397 #define tcg_temp_ebb_new  tcg_temp_ebb_new_i32
398 #endif
399 
400 static TCGv_i64 maybe_extend_addr64(TCGv addr)
401 {
402 #if TARGET_LONG_BITS == 32
403     TCGv_i64 a64 = tcg_temp_ebb_new_i64();
404     tcg_gen_extu_i32_i64(a64, addr);
405     return a64;
406 #else
407     return addr;
408 #endif
409 }
410 
411 static void maybe_free_addr64(TCGv_i64 a64)
412 {
413 #if TARGET_LONG_BITS == 32
414     tcg_temp_free_i64(a64);
415 #endif
416 }
417 
418 void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
419 {
420     const MemOpIdx oi = make_memop_idx(memop, idx);
421 
422     tcg_debug_assert((memop & MO_SIZE) == MO_128);
423     tcg_debug_assert((memop & MO_SIGN) == 0);
424 
425     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
426 
427     /* TODO: For now, force 32-bit hosts to use the helper. */
428     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
429         TCGv_i64 lo, hi;
430         TCGArg addr_arg;
431         MemOpIdx adj_oi;
432         bool need_bswap = false;
433 
434         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
435             lo = TCGV128_HIGH(val);
436             hi = TCGV128_LOW(val);
437             adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
438             need_bswap = true;
439         } else {
440             lo = TCGV128_LOW(val);
441             hi = TCGV128_HIGH(val);
442             adj_oi = oi;
443         }
444 
445 #if TARGET_LONG_BITS == 32
446         addr_arg = tcgv_i32_arg(addr);
447 #else
448         addr_arg = tcgv_i64_arg(addr);
449 #endif
450         tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
451 
452         if (need_bswap) {
453             tcg_gen_bswap64_i64(lo, lo);
454             tcg_gen_bswap64_i64(hi, hi);
455         }
456     } else if (use_two_i64_for_i128(memop)) {
457         MemOp mop[2];
458         TCGv addr_p8;
459         TCGv_i64 x, y;
460 
461         canonicalize_memop_i128_as_i64(mop, memop);
462 
463         /*
464          * Since there are no global TCGv_i128, there is no visible state
465          * changed if the second load faults.  Load directly into the two
466          * subwords.
467          */
468         if ((memop & MO_BSWAP) == MO_LE) {
469             x = TCGV128_LOW(val);
470             y = TCGV128_HIGH(val);
471         } else {
472             x = TCGV128_HIGH(val);
473             y = TCGV128_LOW(val);
474         }
475 
476         gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
477 
478         if ((mop[0] ^ memop) & MO_BSWAP) {
479             tcg_gen_bswap64_i64(x, x);
480         }
481 
482         addr_p8 = tcg_temp_ebb_new();
483         tcg_gen_addi_tl(addr_p8, addr, 8);
484         gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
485         tcg_temp_free(addr_p8);
486 
487         if ((mop[0] ^ memop) & MO_BSWAP) {
488             tcg_gen_bswap64_i64(y, y);
489         }
490     } else {
491         TCGv_i64 a64 = maybe_extend_addr64(addr);
492         gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
493         maybe_free_addr64(a64);
494     }
495 
496     plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
497 }
498 
499 void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
500 {
501     const MemOpIdx oi = make_memop_idx(memop, idx);
502 
503     tcg_debug_assert((memop & MO_SIZE) == MO_128);
504     tcg_debug_assert((memop & MO_SIGN) == 0);
505 
506     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
507 
508     /* TODO: For now, force 32-bit hosts to use the helper. */
509 
510     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
511         TCGv_i64 lo, hi;
512         TCGArg addr_arg;
513         MemOpIdx adj_oi;
514         bool need_bswap = false;
515 
516         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
517             lo = tcg_temp_new_i64();
518             hi = tcg_temp_new_i64();
519             tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
520             tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
521             adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
522             need_bswap = true;
523         } else {
524             lo = TCGV128_LOW(val);
525             hi = TCGV128_HIGH(val);
526             adj_oi = oi;
527         }
528 
529 #if TARGET_LONG_BITS == 32
530         addr_arg = tcgv_i32_arg(addr);
531 #else
532         addr_arg = tcgv_i64_arg(addr);
533 #endif
534         tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
535 
536         if (need_bswap) {
537             tcg_temp_free_i64(lo);
538             tcg_temp_free_i64(hi);
539         }
540     } else if (use_two_i64_for_i128(memop)) {
541         MemOp mop[2];
542         TCGv addr_p8;
543         TCGv_i64 x, y;
544 
545         canonicalize_memop_i128_as_i64(mop, memop);
546 
547         if ((memop & MO_BSWAP) == MO_LE) {
548             x = TCGV128_LOW(val);
549             y = TCGV128_HIGH(val);
550         } else {
551             x = TCGV128_HIGH(val);
552             y = TCGV128_LOW(val);
553         }
554 
555         addr_p8 = tcg_temp_ebb_new();
556         if ((mop[0] ^ memop) & MO_BSWAP) {
557             TCGv_i64 t = tcg_temp_ebb_new_i64();
558 
559             tcg_gen_bswap64_i64(t, x);
560             gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
561             tcg_gen_bswap64_i64(t, y);
562             tcg_gen_addi_tl(addr_p8, addr, 8);
563             gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
564             tcg_temp_free_i64(t);
565         } else {
566             gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
567             tcg_gen_addi_tl(addr_p8, addr, 8);
568             gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
569         }
570         tcg_temp_free(addr_p8);
571     } else {
572         TCGv_i64 a64 = maybe_extend_addr64(addr);
573         gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
574         maybe_free_addr64(a64);
575     }
576 
577     plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
578 }
579 
580 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
581 {
582     switch (opc & MO_SSIZE) {
583     case MO_SB:
584         tcg_gen_ext8s_i32(ret, val);
585         break;
586     case MO_UB:
587         tcg_gen_ext8u_i32(ret, val);
588         break;
589     case MO_SW:
590         tcg_gen_ext16s_i32(ret, val);
591         break;
592     case MO_UW:
593         tcg_gen_ext16u_i32(ret, val);
594         break;
595     default:
596         tcg_gen_mov_i32(ret, val);
597         break;
598     }
599 }
600 
601 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
602 {
603     switch (opc & MO_SSIZE) {
604     case MO_SB:
605         tcg_gen_ext8s_i64(ret, val);
606         break;
607     case MO_UB:
608         tcg_gen_ext8u_i64(ret, val);
609         break;
610     case MO_SW:
611         tcg_gen_ext16s_i64(ret, val);
612         break;
613     case MO_UW:
614         tcg_gen_ext16u_i64(ret, val);
615         break;
616     case MO_SL:
617         tcg_gen_ext32s_i64(ret, val);
618         break;
619     case MO_UL:
620         tcg_gen_ext32u_i64(ret, val);
621         break;
622     default:
623         tcg_gen_mov_i64(ret, val);
624         break;
625     }
626 }
627 
628 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
629                                   TCGv_i32, TCGv_i32, TCGv_i32);
630 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
631                                   TCGv_i64, TCGv_i64, TCGv_i32);
632 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
633                                    TCGv_i128, TCGv_i128, TCGv_i32);
634 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
635                                   TCGv_i32, TCGv_i32);
636 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
637                                   TCGv_i64, TCGv_i32);
638 
639 #ifdef CONFIG_ATOMIC64
640 # define WITH_ATOMIC64(X) X,
641 #else
642 # define WITH_ATOMIC64(X)
643 #endif
644 #ifdef CONFIG_CMPXCHG128
645 # define WITH_ATOMIC128(X) X,
646 #else
647 # define WITH_ATOMIC128(X)
648 #endif
649 
650 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
651     [MO_8] = gen_helper_atomic_cmpxchgb,
652     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
653     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
654     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
655     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
656     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
657     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
658     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
659     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
660 };
661 
662 void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
663                                    TCGv_i32 newv, TCGArg idx, MemOp memop)
664 {
665     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
666     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
667 
668     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
669 
670     tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
671     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
672     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
673     tcg_temp_free_i32(t2);
674 
675     if (memop & MO_SIGN) {
676         tcg_gen_ext_i32(retv, t1, memop);
677     } else {
678         tcg_gen_mov_i32(retv, t1);
679     }
680     tcg_temp_free_i32(t1);
681 }
682 
683 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
684                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
685 {
686     gen_atomic_cx_i32 gen;
687     TCGv_i64 a64;
688     MemOpIdx oi;
689 
690     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
691         tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
692         return;
693     }
694 
695     memop = tcg_canonicalize_memop(memop, 0, 0);
696     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
697     tcg_debug_assert(gen != NULL);
698 
699     oi = make_memop_idx(memop & ~MO_SIGN, idx);
700     a64 = maybe_extend_addr64(addr);
701     gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
702     maybe_free_addr64(a64);
703 
704     if (memop & MO_SIGN) {
705         tcg_gen_ext_i32(retv, retv, memop);
706     }
707 }
708 
709 void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
710                                    TCGv_i64 newv, TCGArg idx, MemOp memop)
711 {
712     TCGv_i64 t1, t2;
713 
714     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
715         tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
716                                       TCGV_LOW(newv), idx, memop);
717         if (memop & MO_SIGN) {
718             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
719         } else {
720             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
721         }
722         return;
723     }
724 
725     t1 = tcg_temp_ebb_new_i64();
726     t2 = tcg_temp_ebb_new_i64();
727 
728     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
729 
730     tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
731     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
732     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
733     tcg_temp_free_i64(t2);
734 
735     if (memop & MO_SIGN) {
736         tcg_gen_ext_i64(retv, t1, memop);
737     } else {
738         tcg_gen_mov_i64(retv, t1);
739     }
740     tcg_temp_free_i64(t1);
741 }
742 
743 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
744                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
745 {
746     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
747         tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
748         return;
749     }
750 
751     if ((memop & MO_SIZE) == MO_64) {
752         gen_atomic_cx_i64 gen;
753 
754         memop = tcg_canonicalize_memop(memop, 1, 0);
755         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
756         if (gen) {
757             MemOpIdx oi = make_memop_idx(memop, idx);
758             TCGv_i64 a64 = maybe_extend_addr64(addr);
759             gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
760             maybe_free_addr64(a64);
761             return;
762         }
763 
764         gen_helper_exit_atomic(cpu_env);
765 
766         /*
767          * Produce a result for a well-formed opcode stream.  This satisfies
768          * liveness for set before used, which happens before this dead code
769          * is removed.
770          */
771         tcg_gen_movi_i64(retv, 0);
772         return;
773     }
774 
775     if (TCG_TARGET_REG_BITS == 32) {
776         tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
777                                    TCGV_LOW(newv), idx, memop);
778         if (memop & MO_SIGN) {
779             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
780         } else {
781             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
782         }
783     } else {
784         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
785         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
786         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
787 
788         tcg_gen_extrl_i64_i32(c32, cmpv);
789         tcg_gen_extrl_i64_i32(n32, newv);
790         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
791         tcg_temp_free_i32(c32);
792         tcg_temp_free_i32(n32);
793 
794         tcg_gen_extu_i32_i64(retv, r32);
795         tcg_temp_free_i32(r32);
796 
797         if (memop & MO_SIGN) {
798             tcg_gen_ext_i64(retv, retv, memop);
799         }
800     }
801 }
802 
803 void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
804                                     TCGv_i128 newv, TCGArg idx, MemOp memop)
805 {
806     if (TCG_TARGET_REG_BITS == 32) {
807         /* Inline expansion below is simply too large for 32-bit hosts. */
808         gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
809                                   ? gen_helper_nonatomic_cmpxchgo_le
810                                   : gen_helper_nonatomic_cmpxchgo_be);
811         MemOpIdx oi = make_memop_idx(memop, idx);
812         TCGv_i64 a64;
813 
814         tcg_debug_assert((memop & MO_SIZE) == MO_128);
815         tcg_debug_assert((memop & MO_SIGN) == 0);
816 
817         a64 = maybe_extend_addr64(addr);
818         gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
819         maybe_free_addr64(a64);
820     } else {
821         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
822         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
823         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
824         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
825         TCGv_i64 z = tcg_constant_i64(0);
826 
827         tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
828 
829         /* Compare i128 */
830         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
831         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
832         tcg_gen_or_i64(t0, t0, t1);
833 
834         /* tmpv = equal ? newv : oldv */
835         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
836                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
837         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
838                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
839 
840         /* Unconditional writeback. */
841         tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
842         tcg_gen_mov_i128(retv, oldv);
843 
844         tcg_temp_free_i64(t0);
845         tcg_temp_free_i64(t1);
846         tcg_temp_free_i128(tmpv);
847         tcg_temp_free_i128(oldv);
848     }
849 }
850 
851 void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
852                                  TCGv_i128 newv, TCGArg idx, MemOp memop)
853 {
854     gen_atomic_cx_i128 gen;
855 
856     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
857         tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
858         return;
859     }
860 
861     tcg_debug_assert((memop & MO_SIZE) == MO_128);
862     tcg_debug_assert((memop & MO_SIGN) == 0);
863     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
864 
865     if (gen) {
866         MemOpIdx oi = make_memop_idx(memop, idx);
867         TCGv_i64 a64 = maybe_extend_addr64(addr);
868         gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
869         maybe_free_addr64(a64);
870         return;
871     }
872 
873     gen_helper_exit_atomic(cpu_env);
874 
875     /*
876      * Produce a result for a well-formed opcode stream.  This satisfies
877      * liveness for set before used, which happens before this dead code
878      * is removed.
879      */
880     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
881     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
882 }
883 
884 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
885                                 TCGArg idx, MemOp memop, bool new_val,
886                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
887 {
888     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
889     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
890 
891     memop = tcg_canonicalize_memop(memop, 0, 0);
892 
893     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
894     tcg_gen_ext_i32(t2, val, memop);
895     gen(t2, t1, t2);
896     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
897 
898     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
899     tcg_temp_free_i32(t1);
900     tcg_temp_free_i32(t2);
901 }
902 
903 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
904                              TCGArg idx, MemOp memop, void * const table[])
905 {
906     gen_atomic_op_i32 gen;
907     TCGv_i64 a64;
908     MemOpIdx oi;
909 
910     memop = tcg_canonicalize_memop(memop, 0, 0);
911 
912     gen = table[memop & (MO_SIZE | MO_BSWAP)];
913     tcg_debug_assert(gen != NULL);
914 
915     oi = make_memop_idx(memop & ~MO_SIGN, idx);
916     a64 = maybe_extend_addr64(addr);
917     gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
918     maybe_free_addr64(a64);
919 
920     if (memop & MO_SIGN) {
921         tcg_gen_ext_i32(ret, ret, memop);
922     }
923 }
924 
925 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
926                                 TCGArg idx, MemOp memop, bool new_val,
927                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
928 {
929     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
930     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
931 
932     memop = tcg_canonicalize_memop(memop, 1, 0);
933 
934     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
935     tcg_gen_ext_i64(t2, val, memop);
936     gen(t2, t1, t2);
937     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
938 
939     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
940     tcg_temp_free_i64(t1);
941     tcg_temp_free_i64(t2);
942 }
943 
944 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
945                              TCGArg idx, MemOp memop, void * const table[])
946 {
947     memop = tcg_canonicalize_memop(memop, 1, 0);
948 
949     if ((memop & MO_SIZE) == MO_64) {
950 #ifdef CONFIG_ATOMIC64
951         gen_atomic_op_i64 gen;
952         TCGv_i64 a64;
953         MemOpIdx oi;
954 
955         gen = table[memop & (MO_SIZE | MO_BSWAP)];
956         tcg_debug_assert(gen != NULL);
957 
958         oi = make_memop_idx(memop & ~MO_SIGN, idx);
959         a64 = maybe_extend_addr64(addr);
960         gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
961         maybe_free_addr64(a64);
962 #else
963         gen_helper_exit_atomic(cpu_env);
964         /* Produce a result, so that we have a well-formed opcode stream
965            with respect to uses of the result in the (dead) code following.  */
966         tcg_gen_movi_i64(ret, 0);
967 #endif /* CONFIG_ATOMIC64 */
968     } else {
969         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
970         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
971 
972         tcg_gen_extrl_i64_i32(v32, val);
973         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
974         tcg_temp_free_i32(v32);
975 
976         tcg_gen_extu_i32_i64(ret, r32);
977         tcg_temp_free_i32(r32);
978 
979         if (memop & MO_SIGN) {
980             tcg_gen_ext_i64(ret, ret, memop);
981         }
982     }
983 }
984 
985 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
986 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
987     [MO_8] = gen_helper_atomic_##NAME##b,                               \
988     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
989     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
990     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
991     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
992     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
993     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
994 };                                                                      \
995 void tcg_gen_atomic_##NAME##_i32                                        \
996     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
997 {                                                                       \
998     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
999         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
1000     } else {                                                            \
1001         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
1002                             tcg_gen_##OP##_i32);                        \
1003     }                                                                   \
1004 }                                                                       \
1005 void tcg_gen_atomic_##NAME##_i64                                        \
1006     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
1007 {                                                                       \
1008     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1009         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
1010     } else {                                                            \
1011         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
1012                             tcg_gen_##OP##_i64);                        \
1013     }                                                                   \
1014 }
1015 
1016 GEN_ATOMIC_HELPER(fetch_add, add, 0)
1017 GEN_ATOMIC_HELPER(fetch_and, and, 0)
1018 GEN_ATOMIC_HELPER(fetch_or, or, 0)
1019 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1020 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1021 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1022 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1023 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1024 
1025 GEN_ATOMIC_HELPER(add_fetch, add, 1)
1026 GEN_ATOMIC_HELPER(and_fetch, and, 1)
1027 GEN_ATOMIC_HELPER(or_fetch, or, 1)
1028 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1029 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1030 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1031 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1032 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1033 
1034 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1035 {
1036     tcg_gen_mov_i32(r, b);
1037 }
1038 
1039 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1040 {
1041     tcg_gen_mov_i64(r, b);
1042 }
1043 
1044 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1045 
1046 #undef GEN_ATOMIC_HELPER
1047