xref: /qemu/tcg/tcg-op-ldst.c (revision a1429ca26e13bdfd10f16348c2d9e5d2a23c1377)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-temp-internal.h"
29 #include "tcg/tcg-op.h"
30 #include "tcg/tcg-mo.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 
34 
35 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
36 {
37     /* Trigger the asserts within as early as possible.  */
38     unsigned a_bits = get_alignment_bits(op);
39 
40     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
41     if (a_bits == (op & MO_SIZE)) {
42         op = (op & ~MO_AMASK) | MO_ALIGN;
43     }
44 
45     switch (op & MO_SIZE) {
46     case MO_8:
47         op &= ~MO_BSWAP;
48         break;
49     case MO_16:
50         break;
51     case MO_32:
52         if (!is64) {
53             op &= ~MO_SIGN;
54         }
55         break;
56     case MO_64:
57         if (is64) {
58             op &= ~MO_SIGN;
59             break;
60         }
61         /* fall through */
62     default:
63         g_assert_not_reached();
64     }
65     if (st) {
66         op &= ~MO_SIGN;
67     }
68     return op;
69 }
70 
71 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
72                          MemOp memop, TCGArg idx)
73 {
74     MemOpIdx oi = make_memop_idx(memop, idx);
75 #if TARGET_LONG_BITS == 32
76     tcg_gen_op3i_i32(opc, val, addr, oi);
77 #else
78     if (TCG_TARGET_REG_BITS == 32) {
79         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
80     } else {
81         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
82     }
83 #endif
84 }
85 
86 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
87                          MemOp memop, TCGArg idx)
88 {
89     MemOpIdx oi = make_memop_idx(memop, idx);
90 #if TARGET_LONG_BITS == 32
91     if (TCG_TARGET_REG_BITS == 32) {
92         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
93     } else {
94         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
95     }
96 #else
97     if (TCG_TARGET_REG_BITS == 32) {
98         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
99                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
100     } else {
101         tcg_gen_op3i_i64(opc, val, addr, oi);
102     }
103 #endif
104 }
105 
106 static void tcg_gen_req_mo(TCGBar type)
107 {
108 #ifdef TCG_GUEST_DEFAULT_MO
109     type &= TCG_GUEST_DEFAULT_MO;
110 #endif
111     type &= ~TCG_TARGET_DEFAULT_MO;
112     if (type) {
113         tcg_gen_mb(type | TCG_BAR_SC);
114     }
115 }
116 
117 static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
118 {
119 #ifdef CONFIG_PLUGIN
120     if (tcg_ctx->plugin_insn != NULL) {
121         /* Save a copy of the vaddr for use after a load.  */
122         TCGv temp = tcg_temp_new();
123         tcg_gen_mov_tl(temp, vaddr);
124         return temp;
125     }
126 #endif
127     return vaddr;
128 }
129 
130 static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
131                                      enum qemu_plugin_mem_rw rw)
132 {
133 #ifdef CONFIG_PLUGIN
134     if (tcg_ctx->plugin_insn != NULL) {
135         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
136         plugin_gen_empty_mem_callback(vaddr, info);
137         tcg_temp_free(vaddr);
138     }
139 #endif
140 }
141 
142 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
143 {
144     MemOp orig_memop;
145     MemOpIdx oi;
146 
147     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
148     memop = tcg_canonicalize_memop(memop, 0, 0);
149     oi = make_memop_idx(memop, idx);
150 
151     orig_memop = memop;
152     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
153         memop &= ~MO_BSWAP;
154         /* The bswap primitive benefits from zero-extended input.  */
155         if ((memop & MO_SSIZE) == MO_SW) {
156             memop &= ~MO_SIGN;
157         }
158     }
159 
160     addr = plugin_prep_mem_callbacks(addr);
161     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
162     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
163 
164     if ((orig_memop ^ memop) & MO_BSWAP) {
165         switch (orig_memop & MO_SIZE) {
166         case MO_16:
167             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
168                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
169                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
170             break;
171         case MO_32:
172             tcg_gen_bswap32_i32(val, val);
173             break;
174         default:
175             g_assert_not_reached();
176         }
177     }
178 }
179 
180 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
181 {
182     TCGv_i32 swap = NULL;
183     MemOpIdx oi;
184 
185     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
186     memop = tcg_canonicalize_memop(memop, 0, 1);
187     oi = make_memop_idx(memop, idx);
188 
189     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
190         swap = tcg_temp_ebb_new_i32();
191         switch (memop & MO_SIZE) {
192         case MO_16:
193             tcg_gen_bswap16_i32(swap, val, 0);
194             break;
195         case MO_32:
196             tcg_gen_bswap32_i32(swap, val);
197             break;
198         default:
199             g_assert_not_reached();
200         }
201         val = swap;
202         memop &= ~MO_BSWAP;
203     }
204 
205     addr = plugin_prep_mem_callbacks(addr);
206     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
207         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
208     } else {
209         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
210     }
211     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
212 
213     if (swap) {
214         tcg_temp_free_i32(swap);
215     }
216 }
217 
218 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
219 {
220     MemOp orig_memop;
221     MemOpIdx oi;
222 
223     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
224         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
225         if (memop & MO_SIGN) {
226             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
227         } else {
228             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
229         }
230         return;
231     }
232 
233     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
234     memop = tcg_canonicalize_memop(memop, 1, 0);
235     oi = make_memop_idx(memop, idx);
236 
237     orig_memop = memop;
238     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
239         memop &= ~MO_BSWAP;
240         /* The bswap primitive benefits from zero-extended input.  */
241         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
242             memop &= ~MO_SIGN;
243         }
244     }
245 
246     addr = plugin_prep_mem_callbacks(addr);
247     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
248     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
249 
250     if ((orig_memop ^ memop) & MO_BSWAP) {
251         int flags = (orig_memop & MO_SIGN
252                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
253                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
254         switch (orig_memop & MO_SIZE) {
255         case MO_16:
256             tcg_gen_bswap16_i64(val, val, flags);
257             break;
258         case MO_32:
259             tcg_gen_bswap32_i64(val, val, flags);
260             break;
261         case MO_64:
262             tcg_gen_bswap64_i64(val, val);
263             break;
264         default:
265             g_assert_not_reached();
266         }
267     }
268 }
269 
270 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
271 {
272     TCGv_i64 swap = NULL;
273     MemOpIdx oi;
274 
275     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
276         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
277         return;
278     }
279 
280     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
281     memop = tcg_canonicalize_memop(memop, 1, 1);
282     oi = make_memop_idx(memop, idx);
283 
284     if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
285         swap = tcg_temp_ebb_new_i64();
286         switch (memop & MO_SIZE) {
287         case MO_16:
288             tcg_gen_bswap16_i64(swap, val, 0);
289             break;
290         case MO_32:
291             tcg_gen_bswap32_i64(swap, val, 0);
292             break;
293         case MO_64:
294             tcg_gen_bswap64_i64(swap, val);
295             break;
296         default:
297             g_assert_not_reached();
298         }
299         val = swap;
300         memop &= ~MO_BSWAP;
301     }
302 
303     addr = plugin_prep_mem_callbacks(addr);
304     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
305     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
306 
307     if (swap) {
308         tcg_temp_free_i64(swap);
309     }
310 }
311 
312 /*
313  * Return true if @mop, without knowledge of the pointer alignment,
314  * does not require 16-byte atomicity, and it would be adventagous
315  * to avoid a call to a helper function.
316  */
317 static bool use_two_i64_for_i128(MemOp mop)
318 {
319 #ifdef CONFIG_SOFTMMU
320     /* Two softmmu tlb lookups is larger than one function call. */
321     return false;
322 #else
323     /*
324      * For user-only, two 64-bit operations may well be smaller than a call.
325      * Determine if that would be legal for the requested atomicity.
326      */
327     switch (mop & MO_ATOM_MASK) {
328     case MO_ATOM_NONE:
329     case MO_ATOM_IFALIGN_PAIR:
330         return true;
331     case MO_ATOM_IFALIGN:
332     case MO_ATOM_SUBALIGN:
333     case MO_ATOM_WITHIN16:
334     case MO_ATOM_WITHIN16_PAIR:
335         /* In a serialized context, no atomicity is required. */
336         return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
337     default:
338         g_assert_not_reached();
339     }
340 #endif
341 }
342 
343 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
344 {
345     MemOp mop_1 = orig, mop_2;
346 
347     tcg_debug_assert((orig & MO_SIZE) == MO_128);
348     tcg_debug_assert((orig & MO_SIGN) == 0);
349 
350     /* Reduce the size to 64-bit. */
351     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
352 
353     /* Retain the alignment constraints of the original. */
354     switch (orig & MO_AMASK) {
355     case MO_UNALN:
356     case MO_ALIGN_2:
357     case MO_ALIGN_4:
358         mop_2 = mop_1;
359         break;
360     case MO_ALIGN_8:
361         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
362         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
363         mop_2 = mop_1;
364         break;
365     case MO_ALIGN:
366         /* Second has 8-byte alignment; first has 16-byte alignment. */
367         mop_2 = mop_1;
368         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
369         break;
370     case MO_ALIGN_16:
371     case MO_ALIGN_32:
372     case MO_ALIGN_64:
373         /* Second has 8-byte alignment; first retains original. */
374         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
375         break;
376     default:
377         g_assert_not_reached();
378     }
379 
380     /* Use a memory ordering implemented by the host. */
381     if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
382         mop_1 &= ~MO_BSWAP;
383         mop_2 &= ~MO_BSWAP;
384     }
385 
386     ret[0] = mop_1;
387     ret[1] = mop_2;
388 }
389 
390 #if TARGET_LONG_BITS == 64
391 #define tcg_temp_ebb_new  tcg_temp_ebb_new_i64
392 #else
393 #define tcg_temp_ebb_new  tcg_temp_ebb_new_i32
394 #endif
395 
396 void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
397 {
398     const MemOpIdx oi = make_memop_idx(memop, idx);
399 
400     tcg_debug_assert((memop & MO_SIZE) == MO_128);
401     tcg_debug_assert((memop & MO_SIGN) == 0);
402 
403     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
404     addr = plugin_prep_mem_callbacks(addr);
405 
406     /* TODO: For now, force 32-bit hosts to use the helper. */
407     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
408         TCGv_i64 lo, hi;
409         TCGArg addr_arg;
410         MemOpIdx adj_oi;
411         bool need_bswap = false;
412 
413         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
414             lo = TCGV128_HIGH(val);
415             hi = TCGV128_LOW(val);
416             adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
417             need_bswap = true;
418         } else {
419             lo = TCGV128_LOW(val);
420             hi = TCGV128_HIGH(val);
421             adj_oi = oi;
422         }
423 
424 #if TARGET_LONG_BITS == 32
425         addr_arg = tcgv_i32_arg(addr);
426 #else
427         addr_arg = tcgv_i64_arg(addr);
428 #endif
429         tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
430 
431         if (need_bswap) {
432             tcg_gen_bswap64_i64(lo, lo);
433             tcg_gen_bswap64_i64(hi, hi);
434         }
435     } else if (use_two_i64_for_i128(memop)) {
436         MemOp mop[2];
437         TCGv addr_p8;
438         TCGv_i64 x, y;
439 
440         canonicalize_memop_i128_as_i64(mop, memop);
441 
442         /*
443          * Since there are no global TCGv_i128, there is no visible state
444          * changed if the second load faults.  Load directly into the two
445          * subwords.
446          */
447         if ((memop & MO_BSWAP) == MO_LE) {
448             x = TCGV128_LOW(val);
449             y = TCGV128_HIGH(val);
450         } else {
451             x = TCGV128_HIGH(val);
452             y = TCGV128_LOW(val);
453         }
454 
455         gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
456 
457         if ((mop[0] ^ memop) & MO_BSWAP) {
458             tcg_gen_bswap64_i64(x, x);
459         }
460 
461         addr_p8 = tcg_temp_ebb_new();
462         tcg_gen_addi_tl(addr_p8, addr, 8);
463         gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
464         tcg_temp_free(addr_p8);
465 
466         if ((mop[0] ^ memop) & MO_BSWAP) {
467             tcg_gen_bswap64_i64(y, y);
468         }
469     } else {
470         gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
471     }
472 
473     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
474 }
475 
476 void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
477 {
478     const MemOpIdx oi = make_memop_idx(memop, idx);
479 
480     tcg_debug_assert((memop & MO_SIZE) == MO_128);
481     tcg_debug_assert((memop & MO_SIGN) == 0);
482 
483     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
484     addr = plugin_prep_mem_callbacks(addr);
485 
486     /* TODO: For now, force 32-bit hosts to use the helper. */
487 
488     if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
489         TCGv_i64 lo, hi;
490         TCGArg addr_arg;
491         MemOpIdx adj_oi;
492         bool need_bswap = false;
493 
494         if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
495             lo = tcg_temp_new_i64();
496             hi = tcg_temp_new_i64();
497             tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
498             tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
499             adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
500             need_bswap = true;
501         } else {
502             lo = TCGV128_LOW(val);
503             hi = TCGV128_HIGH(val);
504             adj_oi = oi;
505         }
506 
507 #if TARGET_LONG_BITS == 32
508         addr_arg = tcgv_i32_arg(addr);
509 #else
510         addr_arg = tcgv_i64_arg(addr);
511 #endif
512         tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
513 
514         if (need_bswap) {
515             tcg_temp_free_i64(lo);
516             tcg_temp_free_i64(hi);
517         }
518     } else if (use_two_i64_for_i128(memop)) {
519         MemOp mop[2];
520         TCGv addr_p8;
521         TCGv_i64 x, y;
522 
523         canonicalize_memop_i128_as_i64(mop, memop);
524 
525         if ((memop & MO_BSWAP) == MO_LE) {
526             x = TCGV128_LOW(val);
527             y = TCGV128_HIGH(val);
528         } else {
529             x = TCGV128_HIGH(val);
530             y = TCGV128_LOW(val);
531         }
532 
533         addr_p8 = tcg_temp_ebb_new();
534         if ((mop[0] ^ memop) & MO_BSWAP) {
535             TCGv_i64 t = tcg_temp_ebb_new_i64();
536 
537             tcg_gen_bswap64_i64(t, x);
538             gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
539             tcg_gen_bswap64_i64(t, y);
540             tcg_gen_addi_tl(addr_p8, addr, 8);
541             gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
542             tcg_temp_free_i64(t);
543         } else {
544             gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
545             tcg_gen_addi_tl(addr_p8, addr, 8);
546             gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
547         }
548         tcg_temp_free(addr_p8);
549     } else {
550         gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
551     }
552 
553     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
554 }
555 
556 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
557 {
558     switch (opc & MO_SSIZE) {
559     case MO_SB:
560         tcg_gen_ext8s_i32(ret, val);
561         break;
562     case MO_UB:
563         tcg_gen_ext8u_i32(ret, val);
564         break;
565     case MO_SW:
566         tcg_gen_ext16s_i32(ret, val);
567         break;
568     case MO_UW:
569         tcg_gen_ext16u_i32(ret, val);
570         break;
571     default:
572         tcg_gen_mov_i32(ret, val);
573         break;
574     }
575 }
576 
577 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
578 {
579     switch (opc & MO_SSIZE) {
580     case MO_SB:
581         tcg_gen_ext8s_i64(ret, val);
582         break;
583     case MO_UB:
584         tcg_gen_ext8u_i64(ret, val);
585         break;
586     case MO_SW:
587         tcg_gen_ext16s_i64(ret, val);
588         break;
589     case MO_UW:
590         tcg_gen_ext16u_i64(ret, val);
591         break;
592     case MO_SL:
593         tcg_gen_ext32s_i64(ret, val);
594         break;
595     case MO_UL:
596         tcg_gen_ext32u_i64(ret, val);
597         break;
598     default:
599         tcg_gen_mov_i64(ret, val);
600         break;
601     }
602 }
603 
604 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
605                                   TCGv_i32, TCGv_i32, TCGv_i32);
606 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
607                                   TCGv_i64, TCGv_i64, TCGv_i32);
608 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
609                                    TCGv_i128, TCGv_i128, TCGv_i32);
610 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
611                                   TCGv_i32, TCGv_i32);
612 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
613                                   TCGv_i64, TCGv_i32);
614 
615 #ifdef CONFIG_ATOMIC64
616 # define WITH_ATOMIC64(X) X,
617 #else
618 # define WITH_ATOMIC64(X)
619 #endif
620 #ifdef CONFIG_CMPXCHG128
621 # define WITH_ATOMIC128(X) X,
622 #else
623 # define WITH_ATOMIC128(X)
624 #endif
625 
626 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
627     [MO_8] = gen_helper_atomic_cmpxchgb,
628     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
629     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
630     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
631     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
632     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
633     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
634     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
635     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
636 };
637 
638 void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
639                                    TCGv_i32 newv, TCGArg idx, MemOp memop)
640 {
641     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
642     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
643 
644     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
645 
646     tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
647     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
648     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
649     tcg_temp_free_i32(t2);
650 
651     if (memop & MO_SIGN) {
652         tcg_gen_ext_i32(retv, t1, memop);
653     } else {
654         tcg_gen_mov_i32(retv, t1);
655     }
656     tcg_temp_free_i32(t1);
657 }
658 
659 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
660                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
661 {
662     gen_atomic_cx_i32 gen;
663     MemOpIdx oi;
664 
665     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
666         tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
667         return;
668     }
669 
670     memop = tcg_canonicalize_memop(memop, 0, 0);
671     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
672     tcg_debug_assert(gen != NULL);
673 
674     oi = make_memop_idx(memop & ~MO_SIGN, idx);
675     gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
676 
677     if (memop & MO_SIGN) {
678         tcg_gen_ext_i32(retv, retv, memop);
679     }
680 }
681 
682 void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
683                                    TCGv_i64 newv, TCGArg idx, MemOp memop)
684 {
685     TCGv_i64 t1, t2;
686 
687     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
688         tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
689                                       TCGV_LOW(newv), idx, memop);
690         if (memop & MO_SIGN) {
691             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
692         } else {
693             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
694         }
695         return;
696     }
697 
698     t1 = tcg_temp_ebb_new_i64();
699     t2 = tcg_temp_ebb_new_i64();
700 
701     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
702 
703     tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
704     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
705     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
706     tcg_temp_free_i64(t2);
707 
708     if (memop & MO_SIGN) {
709         tcg_gen_ext_i64(retv, t1, memop);
710     } else {
711         tcg_gen_mov_i64(retv, t1);
712     }
713     tcg_temp_free_i64(t1);
714 }
715 
716 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
717                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
718 {
719     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
720         tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
721         return;
722     }
723 
724     if ((memop & MO_SIZE) == MO_64) {
725         gen_atomic_cx_i64 gen;
726 
727         memop = tcg_canonicalize_memop(memop, 1, 0);
728         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
729         if (gen) {
730             MemOpIdx oi = make_memop_idx(memop, idx);
731             gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
732             return;
733         }
734 
735         gen_helper_exit_atomic(cpu_env);
736 
737         /*
738          * Produce a result for a well-formed opcode stream.  This satisfies
739          * liveness for set before used, which happens before this dead code
740          * is removed.
741          */
742         tcg_gen_movi_i64(retv, 0);
743         return;
744     }
745 
746     if (TCG_TARGET_REG_BITS == 32) {
747         tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
748                                    TCGV_LOW(newv), idx, memop);
749         if (memop & MO_SIGN) {
750             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
751         } else {
752             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
753         }
754     } else {
755         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
756         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
757         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
758 
759         tcg_gen_extrl_i64_i32(c32, cmpv);
760         tcg_gen_extrl_i64_i32(n32, newv);
761         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
762         tcg_temp_free_i32(c32);
763         tcg_temp_free_i32(n32);
764 
765         tcg_gen_extu_i32_i64(retv, r32);
766         tcg_temp_free_i32(r32);
767 
768         if (memop & MO_SIGN) {
769             tcg_gen_ext_i64(retv, retv, memop);
770         }
771     }
772 }
773 
774 void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
775                                     TCGv_i128 newv, TCGArg idx, MemOp memop)
776 {
777     if (TCG_TARGET_REG_BITS == 32) {
778         /* Inline expansion below is simply too large for 32-bit hosts. */
779         gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
780                                   ? gen_helper_nonatomic_cmpxchgo_le
781                                   : gen_helper_nonatomic_cmpxchgo_be);
782         MemOpIdx oi = make_memop_idx(memop, idx);
783 
784         tcg_debug_assert((memop & MO_SIZE) == MO_128);
785         tcg_debug_assert((memop & MO_SIGN) == 0);
786 
787         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
788     } else {
789         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
790         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
791         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
792         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
793         TCGv_i64 z = tcg_constant_i64(0);
794 
795         tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
796 
797         /* Compare i128 */
798         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
799         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
800         tcg_gen_or_i64(t0, t0, t1);
801 
802         /* tmpv = equal ? newv : oldv */
803         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
804                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
805         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
806                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
807 
808         /* Unconditional writeback. */
809         tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
810         tcg_gen_mov_i128(retv, oldv);
811 
812         tcg_temp_free_i64(t0);
813         tcg_temp_free_i64(t1);
814         tcg_temp_free_i128(tmpv);
815         tcg_temp_free_i128(oldv);
816     }
817 }
818 
819 void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
820                                  TCGv_i128 newv, TCGArg idx, MemOp memop)
821 {
822     gen_atomic_cx_i128 gen;
823 
824     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
825         tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
826         return;
827     }
828 
829     tcg_debug_assert((memop & MO_SIZE) == MO_128);
830     tcg_debug_assert((memop & MO_SIGN) == 0);
831     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
832 
833     if (gen) {
834         MemOpIdx oi = make_memop_idx(memop, idx);
835         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
836         return;
837     }
838 
839     gen_helper_exit_atomic(cpu_env);
840 
841     /*
842      * Produce a result for a well-formed opcode stream.  This satisfies
843      * liveness for set before used, which happens before this dead code
844      * is removed.
845      */
846     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
847     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
848 }
849 
850 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
851                                 TCGArg idx, MemOp memop, bool new_val,
852                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
853 {
854     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
855     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
856 
857     memop = tcg_canonicalize_memop(memop, 0, 0);
858 
859     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
860     tcg_gen_ext_i32(t2, val, memop);
861     gen(t2, t1, t2);
862     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
863 
864     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
865     tcg_temp_free_i32(t1);
866     tcg_temp_free_i32(t2);
867 }
868 
869 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
870                              TCGArg idx, MemOp memop, void * const table[])
871 {
872     gen_atomic_op_i32 gen;
873     MemOpIdx oi;
874 
875     memop = tcg_canonicalize_memop(memop, 0, 0);
876 
877     gen = table[memop & (MO_SIZE | MO_BSWAP)];
878     tcg_debug_assert(gen != NULL);
879 
880     oi = make_memop_idx(memop & ~MO_SIGN, idx);
881     gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
882 
883     if (memop & MO_SIGN) {
884         tcg_gen_ext_i32(ret, ret, memop);
885     }
886 }
887 
888 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
889                                 TCGArg idx, MemOp memop, bool new_val,
890                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
891 {
892     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
893     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
894 
895     memop = tcg_canonicalize_memop(memop, 1, 0);
896 
897     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
898     tcg_gen_ext_i64(t2, val, memop);
899     gen(t2, t1, t2);
900     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
901 
902     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
903     tcg_temp_free_i64(t1);
904     tcg_temp_free_i64(t2);
905 }
906 
907 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
908                              TCGArg idx, MemOp memop, void * const table[])
909 {
910     memop = tcg_canonicalize_memop(memop, 1, 0);
911 
912     if ((memop & MO_SIZE) == MO_64) {
913 #ifdef CONFIG_ATOMIC64
914         gen_atomic_op_i64 gen;
915         MemOpIdx oi;
916 
917         gen = table[memop & (MO_SIZE | MO_BSWAP)];
918         tcg_debug_assert(gen != NULL);
919 
920         oi = make_memop_idx(memop & ~MO_SIGN, idx);
921         gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
922 #else
923         gen_helper_exit_atomic(cpu_env);
924         /* Produce a result, so that we have a well-formed opcode stream
925            with respect to uses of the result in the (dead) code following.  */
926         tcg_gen_movi_i64(ret, 0);
927 #endif /* CONFIG_ATOMIC64 */
928     } else {
929         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
930         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
931 
932         tcg_gen_extrl_i64_i32(v32, val);
933         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
934         tcg_temp_free_i32(v32);
935 
936         tcg_gen_extu_i32_i64(ret, r32);
937         tcg_temp_free_i32(r32);
938 
939         if (memop & MO_SIGN) {
940             tcg_gen_ext_i64(ret, ret, memop);
941         }
942     }
943 }
944 
945 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
946 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
947     [MO_8] = gen_helper_atomic_##NAME##b,                               \
948     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
949     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
950     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
951     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
952     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
953     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
954 };                                                                      \
955 void tcg_gen_atomic_##NAME##_i32                                        \
956     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
957 {                                                                       \
958     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
959         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
960     } else {                                                            \
961         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
962                             tcg_gen_##OP##_i32);                        \
963     }                                                                   \
964 }                                                                       \
965 void tcg_gen_atomic_##NAME##_i64                                        \
966     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
967 {                                                                       \
968     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
969         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
970     } else {                                                            \
971         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
972                             tcg_gen_##OP##_i64);                        \
973     }                                                                   \
974 }
975 
976 GEN_ATOMIC_HELPER(fetch_add, add, 0)
977 GEN_ATOMIC_HELPER(fetch_and, and, 0)
978 GEN_ATOMIC_HELPER(fetch_or, or, 0)
979 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
980 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
981 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
982 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
983 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
984 
985 GEN_ATOMIC_HELPER(add_fetch, add, 1)
986 GEN_ATOMIC_HELPER(and_fetch, and, 1)
987 GEN_ATOMIC_HELPER(or_fetch, or, 1)
988 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
989 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
990 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
991 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
992 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
993 
994 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
995 {
996     tcg_gen_mov_i32(r, b);
997 }
998 
999 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1000 {
1001     tcg_gen_mov_i64(r, b);
1002 }
1003 
1004 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1005 
1006 #undef GEN_ATOMIC_HELPER
1007