1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32
33
34 typedef struct MemCopyInfo {
35 IntervalTreeNode itree;
36 QSIMPLEQ_ENTRY (MemCopyInfo) next;
37 TCGTemp *ts;
38 TCGType type;
39 } MemCopyInfo;
40
41 typedef struct TempOptInfo {
42 TCGTemp *prev_copy;
43 TCGTemp *next_copy;
44 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
45 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
46 uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */
47 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
48 } TempOptInfo;
49
50 typedef struct OptContext {
51 TCGContext *tcg;
52 TCGOp *prev_mb;
53 TCGTempSet temps_used;
54
55 IntervalTreeRoot mem_copy;
56 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
57
58 /* In flight values from optimization. */
59 TCGType type;
60 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */
61 } OptContext;
62
ts_info(TCGTemp * ts)63 static inline TempOptInfo *ts_info(TCGTemp *ts)
64 {
65 return ts->state_ptr;
66 }
67
arg_info(TCGArg arg)68 static inline TempOptInfo *arg_info(TCGArg arg)
69 {
70 return ts_info(arg_temp(arg));
71 }
72
ti_is_const(TempOptInfo * ti)73 static inline bool ti_is_const(TempOptInfo *ti)
74 {
75 /* If all bits that are not known zeros are known ones, it's constant. */
76 return ti->z_mask == ti->o_mask;
77 }
78
ti_const_val(TempOptInfo * ti)79 static inline uint64_t ti_const_val(TempOptInfo *ti)
80 {
81 /* If constant, both z_mask and o_mask contain the value. */
82 return ti->z_mask;
83 }
84
ti_is_const_val(TempOptInfo * ti,uint64_t val)85 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
86 {
87 return ti_is_const(ti) && ti_const_val(ti) == val;
88 }
89
ts_is_const(TCGTemp * ts)90 static inline bool ts_is_const(TCGTemp *ts)
91 {
92 return ti_is_const(ts_info(ts));
93 }
94
ts_is_const_val(TCGTemp * ts,uint64_t val)95 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
96 {
97 return ti_is_const_val(ts_info(ts), val);
98 }
99
arg_is_const(TCGArg arg)100 static inline bool arg_is_const(TCGArg arg)
101 {
102 return ts_is_const(arg_temp(arg));
103 }
104
arg_const_val(TCGArg arg)105 static inline uint64_t arg_const_val(TCGArg arg)
106 {
107 return ti_const_val(arg_info(arg));
108 }
109
arg_is_const_val(TCGArg arg,uint64_t val)110 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
111 {
112 return ts_is_const_val(arg_temp(arg), val);
113 }
114
ts_is_copy(TCGTemp * ts)115 static inline bool ts_is_copy(TCGTemp *ts)
116 {
117 return ts_info(ts)->next_copy != ts;
118 }
119
cmp_better_copy(TCGTemp * a,TCGTemp * b)120 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
121 {
122 return a->kind < b->kind ? b : a;
123 }
124
125 /* Initialize and activate a temporary. */
init_ts_info(OptContext * ctx,TCGTemp * ts)126 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
127 {
128 size_t idx = temp_idx(ts);
129 TempOptInfo *ti;
130
131 if (test_bit(idx, ctx->temps_used.l)) {
132 return;
133 }
134 set_bit(idx, ctx->temps_used.l);
135
136 ti = ts->state_ptr;
137 if (ti == NULL) {
138 ti = tcg_malloc(sizeof(TempOptInfo));
139 ts->state_ptr = ti;
140 }
141
142 ti->next_copy = ts;
143 ti->prev_copy = ts;
144 QSIMPLEQ_INIT(&ti->mem_copy);
145 if (ts->kind == TEMP_CONST) {
146 ti->z_mask = ts->val;
147 ti->o_mask = ts->val;
148 ti->s_mask = INT64_MIN >> clrsb64(ts->val);
149 } else {
150 ti->z_mask = -1;
151 ti->o_mask = 0;
152 ti->s_mask = 0;
153 }
154 }
155
mem_copy_first(OptContext * ctx,intptr_t s,intptr_t l)156 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
157 {
158 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
159 return r ? container_of(r, MemCopyInfo, itree) : NULL;
160 }
161
mem_copy_next(MemCopyInfo * mem,intptr_t s,intptr_t l)162 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
163 {
164 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
165 return r ? container_of(r, MemCopyInfo, itree) : NULL;
166 }
167
remove_mem_copy(OptContext * ctx,MemCopyInfo * mc)168 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
169 {
170 TCGTemp *ts = mc->ts;
171 TempOptInfo *ti = ts_info(ts);
172
173 interval_tree_remove(&mc->itree, &ctx->mem_copy);
174 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
175 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
176 }
177
remove_mem_copy_in(OptContext * ctx,intptr_t s,intptr_t l)178 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
179 {
180 while (true) {
181 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
182 if (!mc) {
183 break;
184 }
185 remove_mem_copy(ctx, mc);
186 }
187 }
188
remove_mem_copy_all(OptContext * ctx)189 static void remove_mem_copy_all(OptContext *ctx)
190 {
191 remove_mem_copy_in(ctx, 0, -1);
192 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
193 }
194
find_better_copy(TCGTemp * ts)195 static TCGTemp *find_better_copy(TCGTemp *ts)
196 {
197 TCGTemp *i, *ret;
198
199 /* If this is already readonly, we can't do better. */
200 if (temp_readonly(ts)) {
201 return ts;
202 }
203
204 ret = ts;
205 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
206 ret = cmp_better_copy(ret, i);
207 }
208 return ret;
209 }
210
move_mem_copies(TCGTemp * dst_ts,TCGTemp * src_ts)211 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
212 {
213 TempOptInfo *si = ts_info(src_ts);
214 TempOptInfo *di = ts_info(dst_ts);
215 MemCopyInfo *mc;
216
217 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
218 tcg_debug_assert(mc->ts == src_ts);
219 mc->ts = dst_ts;
220 }
221 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
222 }
223
224 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
reset_ts(OptContext * ctx,TCGTemp * ts)225 static void reset_ts(OptContext *ctx, TCGTemp *ts)
226 {
227 TempOptInfo *ti = ts_info(ts);
228 TCGTemp *pts = ti->prev_copy;
229 TCGTemp *nts = ti->next_copy;
230 TempOptInfo *pi = ts_info(pts);
231 TempOptInfo *ni = ts_info(nts);
232
233 ni->prev_copy = ti->prev_copy;
234 pi->next_copy = ti->next_copy;
235 ti->next_copy = ts;
236 ti->prev_copy = ts;
237 ti->z_mask = -1;
238 ti->o_mask = 0;
239 ti->s_mask = 0;
240
241 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
242 if (ts == nts) {
243 /* Last temp copy being removed, the mem copies die. */
244 MemCopyInfo *mc;
245 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
246 interval_tree_remove(&mc->itree, &ctx->mem_copy);
247 }
248 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
249 } else {
250 move_mem_copies(find_better_copy(nts), ts);
251 }
252 }
253 }
254
reset_temp(OptContext * ctx,TCGArg arg)255 static void reset_temp(OptContext *ctx, TCGArg arg)
256 {
257 reset_ts(ctx, arg_temp(arg));
258 }
259
record_mem_copy(OptContext * ctx,TCGType type,TCGTemp * ts,intptr_t start,intptr_t last)260 static void record_mem_copy(OptContext *ctx, TCGType type,
261 TCGTemp *ts, intptr_t start, intptr_t last)
262 {
263 MemCopyInfo *mc;
264 TempOptInfo *ti;
265
266 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
267 if (mc) {
268 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
269 } else {
270 mc = tcg_malloc(sizeof(*mc));
271 }
272
273 memset(mc, 0, sizeof(*mc));
274 mc->itree.start = start;
275 mc->itree.last = last;
276 mc->type = type;
277 interval_tree_insert(&mc->itree, &ctx->mem_copy);
278
279 ts = find_better_copy(ts);
280 ti = ts_info(ts);
281 mc->ts = ts;
282 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
283 }
284
ts_are_copies(TCGTemp * ts1,TCGTemp * ts2)285 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
286 {
287 TCGTemp *i;
288
289 if (ts1 == ts2) {
290 return true;
291 }
292
293 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
294 return false;
295 }
296
297 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
298 if (i == ts2) {
299 return true;
300 }
301 }
302
303 return false;
304 }
305
args_are_copies(TCGArg arg1,TCGArg arg2)306 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
307 {
308 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
309 }
310
find_mem_copy_for(OptContext * ctx,TCGType type,intptr_t s)311 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
312 {
313 MemCopyInfo *mc;
314
315 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
316 if (mc->itree.start == s && mc->type == type) {
317 return find_better_copy(mc->ts);
318 }
319 }
320 return NULL;
321 }
322
arg_new_constant(OptContext * ctx,uint64_t val)323 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
324 {
325 TCGType type = ctx->type;
326 TCGTemp *ts;
327
328 if (type == TCG_TYPE_I32) {
329 val = (int32_t)val;
330 }
331
332 ts = tcg_constant_internal(type, val);
333 init_ts_info(ctx, ts);
334
335 return temp_arg(ts);
336 }
337
arg_new_temp(OptContext * ctx)338 static TCGArg arg_new_temp(OptContext *ctx)
339 {
340 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
341 init_ts_info(ctx, ts);
342 return temp_arg(ts);
343 }
344
opt_insert_after(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)345 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
346 TCGOpcode opc, unsigned narg)
347 {
348 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
349 }
350
opt_insert_before(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)351 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
352 TCGOpcode opc, unsigned narg)
353 {
354 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
355 }
356
tcg_opt_gen_mov(OptContext * ctx,TCGOp * op,TCGArg dst,TCGArg src)357 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
358 {
359 TCGTemp *dst_ts = arg_temp(dst);
360 TCGTemp *src_ts = arg_temp(src);
361 TempOptInfo *di;
362 TempOptInfo *si;
363 TCGOpcode new_op;
364
365 if (ts_are_copies(dst_ts, src_ts)) {
366 tcg_op_remove(ctx->tcg, op);
367 return true;
368 }
369
370 reset_ts(ctx, dst_ts);
371 di = ts_info(dst_ts);
372 si = ts_info(src_ts);
373
374 switch (ctx->type) {
375 case TCG_TYPE_I32:
376 case TCG_TYPE_I64:
377 new_op = INDEX_op_mov;
378 break;
379 case TCG_TYPE_V64:
380 case TCG_TYPE_V128:
381 case TCG_TYPE_V256:
382 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
383 new_op = INDEX_op_mov_vec;
384 break;
385 default:
386 g_assert_not_reached();
387 }
388 op->opc = new_op;
389 op->args[0] = dst;
390 op->args[1] = src;
391
392 di->z_mask = si->z_mask;
393 di->o_mask = si->o_mask;
394 di->s_mask = si->s_mask;
395
396 if (src_ts->type == dst_ts->type) {
397 TempOptInfo *ni = ts_info(si->next_copy);
398
399 di->next_copy = si->next_copy;
400 di->prev_copy = src_ts;
401 ni->prev_copy = dst_ts;
402 si->next_copy = dst_ts;
403
404 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
405 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
406 move_mem_copies(dst_ts, src_ts);
407 }
408 } else if (dst_ts->type == TCG_TYPE_I32) {
409 di->z_mask = (int32_t)di->z_mask;
410 di->o_mask = (int32_t)di->o_mask;
411 di->s_mask |= INT32_MIN;
412 } else {
413 di->z_mask |= MAKE_64BIT_MASK(32, 32);
414 di->o_mask = (uint32_t)di->o_mask;
415 di->s_mask = INT64_MIN;
416 }
417 return true;
418 }
419
tcg_opt_gen_movi(OptContext * ctx,TCGOp * op,TCGArg dst,uint64_t val)420 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
421 TCGArg dst, uint64_t val)
422 {
423 /* Convert movi to mov with constant temp. */
424 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
425 }
426
do_constant_folding_2(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)427 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
428 uint64_t x, uint64_t y)
429 {
430 uint64_t l64, h64;
431
432 switch (op) {
433 case INDEX_op_add:
434 return x + y;
435
436 case INDEX_op_sub:
437 return x - y;
438
439 case INDEX_op_mul:
440 return x * y;
441
442 case INDEX_op_and:
443 case INDEX_op_and_vec:
444 return x & y;
445
446 case INDEX_op_or:
447 case INDEX_op_or_vec:
448 return x | y;
449
450 case INDEX_op_xor:
451 case INDEX_op_xor_vec:
452 return x ^ y;
453
454 case INDEX_op_shl:
455 if (type == TCG_TYPE_I32) {
456 return (uint32_t)x << (y & 31);
457 }
458 return (uint64_t)x << (y & 63);
459
460 case INDEX_op_shr:
461 if (type == TCG_TYPE_I32) {
462 return (uint32_t)x >> (y & 31);
463 }
464 return (uint64_t)x >> (y & 63);
465
466 case INDEX_op_sar:
467 if (type == TCG_TYPE_I32) {
468 return (int32_t)x >> (y & 31);
469 }
470 return (int64_t)x >> (y & 63);
471
472 case INDEX_op_rotr:
473 if (type == TCG_TYPE_I32) {
474 return ror32(x, y & 31);
475 }
476 return ror64(x, y & 63);
477
478 case INDEX_op_rotl:
479 if (type == TCG_TYPE_I32) {
480 return rol32(x, y & 31);
481 }
482 return rol64(x, y & 63);
483
484 case INDEX_op_not:
485 case INDEX_op_not_vec:
486 return ~x;
487
488 case INDEX_op_neg:
489 return -x;
490
491 case INDEX_op_andc:
492 case INDEX_op_andc_vec:
493 return x & ~y;
494
495 case INDEX_op_orc:
496 case INDEX_op_orc_vec:
497 return x | ~y;
498
499 case INDEX_op_eqv:
500 case INDEX_op_eqv_vec:
501 return ~(x ^ y);
502
503 case INDEX_op_nand:
504 case INDEX_op_nand_vec:
505 return ~(x & y);
506
507 case INDEX_op_nor:
508 case INDEX_op_nor_vec:
509 return ~(x | y);
510
511 case INDEX_op_clz:
512 if (type == TCG_TYPE_I32) {
513 return (uint32_t)x ? clz32(x) : y;
514 }
515 return x ? clz64(x) : y;
516
517 case INDEX_op_ctz:
518 if (type == TCG_TYPE_I32) {
519 return (uint32_t)x ? ctz32(x) : y;
520 }
521 return x ? ctz64(x) : y;
522
523 case INDEX_op_ctpop:
524 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
525
526 case INDEX_op_bswap16:
527 x = bswap16(x);
528 return y & TCG_BSWAP_OS ? (int16_t)x : x;
529
530 case INDEX_op_bswap32:
531 x = bswap32(x);
532 return y & TCG_BSWAP_OS ? (int32_t)x : x;
533
534 case INDEX_op_bswap64:
535 return bswap64(x);
536
537 case INDEX_op_ext_i32_i64:
538 return (int32_t)x;
539
540 case INDEX_op_extu_i32_i64:
541 case INDEX_op_extrl_i64_i32:
542 return (uint32_t)x;
543
544 case INDEX_op_extrh_i64_i32:
545 return (uint64_t)x >> 32;
546
547 case INDEX_op_muluh:
548 if (type == TCG_TYPE_I32) {
549 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
550 }
551 mulu64(&l64, &h64, x, y);
552 return h64;
553
554 case INDEX_op_mulsh:
555 if (type == TCG_TYPE_I32) {
556 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
557 }
558 muls64(&l64, &h64, x, y);
559 return h64;
560
561 case INDEX_op_divs:
562 /* Avoid crashing on divide by zero, otherwise undefined. */
563 if (type == TCG_TYPE_I32) {
564 return (int32_t)x / ((int32_t)y ? : 1);
565 }
566 return (int64_t)x / ((int64_t)y ? : 1);
567
568 case INDEX_op_divu:
569 if (type == TCG_TYPE_I32) {
570 return (uint32_t)x / ((uint32_t)y ? : 1);
571 }
572 return (uint64_t)x / ((uint64_t)y ? : 1);
573
574 case INDEX_op_rems:
575 if (type == TCG_TYPE_I32) {
576 return (int32_t)x % ((int32_t)y ? : 1);
577 }
578 return (int64_t)x % ((int64_t)y ? : 1);
579
580 case INDEX_op_remu:
581 if (type == TCG_TYPE_I32) {
582 return (uint32_t)x % ((uint32_t)y ? : 1);
583 }
584 return (uint64_t)x % ((uint64_t)y ? : 1);
585
586 default:
587 g_assert_not_reached();
588 }
589 }
590
do_constant_folding(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)591 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
592 uint64_t x, uint64_t y)
593 {
594 uint64_t res = do_constant_folding_2(op, type, x, y);
595 if (type == TCG_TYPE_I32) {
596 res = (int32_t)res;
597 }
598 return res;
599 }
600
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)601 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
602 {
603 switch (c) {
604 case TCG_COND_EQ:
605 return x == y;
606 case TCG_COND_NE:
607 return x != y;
608 case TCG_COND_LT:
609 return (int32_t)x < (int32_t)y;
610 case TCG_COND_GE:
611 return (int32_t)x >= (int32_t)y;
612 case TCG_COND_LE:
613 return (int32_t)x <= (int32_t)y;
614 case TCG_COND_GT:
615 return (int32_t)x > (int32_t)y;
616 case TCG_COND_LTU:
617 return x < y;
618 case TCG_COND_GEU:
619 return x >= y;
620 case TCG_COND_LEU:
621 return x <= y;
622 case TCG_COND_GTU:
623 return x > y;
624 case TCG_COND_TSTEQ:
625 return (x & y) == 0;
626 case TCG_COND_TSTNE:
627 return (x & y) != 0;
628 case TCG_COND_ALWAYS:
629 case TCG_COND_NEVER:
630 break;
631 }
632 g_assert_not_reached();
633 }
634
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)635 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
636 {
637 switch (c) {
638 case TCG_COND_EQ:
639 return x == y;
640 case TCG_COND_NE:
641 return x != y;
642 case TCG_COND_LT:
643 return (int64_t)x < (int64_t)y;
644 case TCG_COND_GE:
645 return (int64_t)x >= (int64_t)y;
646 case TCG_COND_LE:
647 return (int64_t)x <= (int64_t)y;
648 case TCG_COND_GT:
649 return (int64_t)x > (int64_t)y;
650 case TCG_COND_LTU:
651 return x < y;
652 case TCG_COND_GEU:
653 return x >= y;
654 case TCG_COND_LEU:
655 return x <= y;
656 case TCG_COND_GTU:
657 return x > y;
658 case TCG_COND_TSTEQ:
659 return (x & y) == 0;
660 case TCG_COND_TSTNE:
661 return (x & y) != 0;
662 case TCG_COND_ALWAYS:
663 case TCG_COND_NEVER:
664 break;
665 }
666 g_assert_not_reached();
667 }
668
do_constant_folding_cond_eq(TCGCond c)669 static int do_constant_folding_cond_eq(TCGCond c)
670 {
671 switch (c) {
672 case TCG_COND_GT:
673 case TCG_COND_LTU:
674 case TCG_COND_LT:
675 case TCG_COND_GTU:
676 case TCG_COND_NE:
677 return 0;
678 case TCG_COND_GE:
679 case TCG_COND_GEU:
680 case TCG_COND_LE:
681 case TCG_COND_LEU:
682 case TCG_COND_EQ:
683 return 1;
684 case TCG_COND_TSTEQ:
685 case TCG_COND_TSTNE:
686 return -1;
687 case TCG_COND_ALWAYS:
688 case TCG_COND_NEVER:
689 break;
690 }
691 g_assert_not_reached();
692 }
693
694 /*
695 * Return -1 if the condition can't be simplified,
696 * and the result of the condition (0 or 1) if it can.
697 */
do_constant_folding_cond(TCGType type,TCGArg x,TCGArg y,TCGCond c)698 static int do_constant_folding_cond(TCGType type, TCGArg x,
699 TCGArg y, TCGCond c)
700 {
701 if (arg_is_const(x) && arg_is_const(y)) {
702 uint64_t xv = arg_const_val(x);
703 uint64_t yv = arg_const_val(y);
704
705 switch (type) {
706 case TCG_TYPE_I32:
707 return do_constant_folding_cond_32(xv, yv, c);
708 case TCG_TYPE_I64:
709 return do_constant_folding_cond_64(xv, yv, c);
710 default:
711 /* Only scalar comparisons are optimizable */
712 return -1;
713 }
714 } else if (args_are_copies(x, y)) {
715 return do_constant_folding_cond_eq(c);
716 } else if (arg_is_const_val(y, 0)) {
717 switch (c) {
718 case TCG_COND_LTU:
719 case TCG_COND_TSTNE:
720 return 0;
721 case TCG_COND_GEU:
722 case TCG_COND_TSTEQ:
723 return 1;
724 default:
725 return -1;
726 }
727 }
728 return -1;
729 }
730
731 /**
732 * swap_commutative:
733 * @dest: TCGArg of the destination argument, or NO_DEST.
734 * @p1: first paired argument
735 * @p2: second paired argument
736 *
737 * If *@p1 is a constant and *@p2 is not, swap.
738 * If *@p2 matches @dest, swap.
739 * Return true if a swap was performed.
740 */
741
742 #define NO_DEST temp_arg(NULL)
743
pref_commutative(TempOptInfo * ti)744 static int pref_commutative(TempOptInfo *ti)
745 {
746 /* Slight preference for non-zero constants second. */
747 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
748 }
749
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)750 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
751 {
752 TCGArg a1 = *p1, a2 = *p2;
753 int sum = 0;
754 sum += pref_commutative(arg_info(a1));
755 sum -= pref_commutative(arg_info(a2));
756
757 /* Prefer the constant in second argument, and then the form
758 op a, a, b, which is better handled on non-RISC hosts. */
759 if (sum > 0 || (sum == 0 && dest == a2)) {
760 *p1 = a2;
761 *p2 = a1;
762 return true;
763 }
764 return false;
765 }
766
swap_commutative2(TCGArg * p1,TCGArg * p2)767 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
768 {
769 int sum = 0;
770 sum += pref_commutative(arg_info(p1[0]));
771 sum += pref_commutative(arg_info(p1[1]));
772 sum -= pref_commutative(arg_info(p2[0]));
773 sum -= pref_commutative(arg_info(p2[1]));
774 if (sum > 0) {
775 TCGArg t;
776 t = p1[0], p1[0] = p2[0], p2[0] = t;
777 t = p1[1], p1[1] = p2[1], p2[1] = t;
778 return true;
779 }
780 return false;
781 }
782
783 /*
784 * Return -1 if the condition can't be simplified,
785 * and the result of the condition (0 or 1) if it can.
786 */
787 static bool fold_and(OptContext *ctx, TCGOp *op);
do_constant_folding_cond1(OptContext * ctx,TCGOp * op,TCGArg dest,TCGArg * p1,TCGArg * p2,TCGArg * pcond)788 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
789 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
790 {
791 TCGCond cond;
792 TempOptInfo *i1;
793 bool swap;
794 int r;
795
796 swap = swap_commutative(dest, p1, p2);
797 cond = *pcond;
798 if (swap) {
799 *pcond = cond = tcg_swap_cond(cond);
800 }
801
802 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
803 if (r >= 0) {
804 return r;
805 }
806 if (!is_tst_cond(cond)) {
807 return -1;
808 }
809
810 i1 = arg_info(*p1);
811
812 /*
813 * TSTNE x,x -> NE x,0
814 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
815 */
816 if (args_are_copies(*p1, *p2) ||
817 (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
818 *p2 = arg_new_constant(ctx, 0);
819 *pcond = tcg_tst_eqne_cond(cond);
820 return -1;
821 }
822
823 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
824 if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
825 *p2 = arg_new_constant(ctx, 0);
826 *pcond = tcg_tst_ltge_cond(cond);
827 return -1;
828 }
829
830 /* Expand to AND with a temporary if no backend support. */
831 if (!TCG_TARGET_HAS_tst) {
832 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
833 TCGArg tmp = arg_new_temp(ctx);
834
835 op2->args[0] = tmp;
836 op2->args[1] = *p1;
837 op2->args[2] = *p2;
838 fold_and(ctx, op2);
839
840 *p1 = tmp;
841 *p2 = arg_new_constant(ctx, 0);
842 *pcond = tcg_tst_eqne_cond(cond);
843 }
844 return -1;
845 }
846
do_constant_folding_cond2(OptContext * ctx,TCGOp * op,TCGArg * args)847 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
848 {
849 TCGArg al, ah, bl, bh;
850 TCGCond c;
851 bool swap;
852 int r;
853
854 swap = swap_commutative2(args, args + 2);
855 c = args[4];
856 if (swap) {
857 args[4] = c = tcg_swap_cond(c);
858 }
859
860 al = args[0];
861 ah = args[1];
862 bl = args[2];
863 bh = args[3];
864
865 if (arg_is_const(bl) && arg_is_const(bh)) {
866 tcg_target_ulong blv = arg_const_val(bl);
867 tcg_target_ulong bhv = arg_const_val(bh);
868 uint64_t b = deposit64(blv, 32, 32, bhv);
869
870 if (arg_is_const(al) && arg_is_const(ah)) {
871 tcg_target_ulong alv = arg_const_val(al);
872 tcg_target_ulong ahv = arg_const_val(ah);
873 uint64_t a = deposit64(alv, 32, 32, ahv);
874
875 r = do_constant_folding_cond_64(a, b, c);
876 if (r >= 0) {
877 return r;
878 }
879 }
880
881 if (b == 0) {
882 switch (c) {
883 case TCG_COND_LTU:
884 case TCG_COND_TSTNE:
885 return 0;
886 case TCG_COND_GEU:
887 case TCG_COND_TSTEQ:
888 return 1;
889 default:
890 break;
891 }
892 }
893
894 /* TSTNE x,-1 -> NE x,0 */
895 if (b == -1 && is_tst_cond(c)) {
896 args[3] = args[2] = arg_new_constant(ctx, 0);
897 args[4] = tcg_tst_eqne_cond(c);
898 return -1;
899 }
900
901 /* TSTNE x,sign -> LT x,0 */
902 if (b == INT64_MIN && is_tst_cond(c)) {
903 /* bl must be 0, so copy that to bh */
904 args[3] = bl;
905 args[4] = tcg_tst_ltge_cond(c);
906 return -1;
907 }
908 }
909
910 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
911 r = do_constant_folding_cond_eq(c);
912 if (r >= 0) {
913 return r;
914 }
915
916 /* TSTNE x,x -> NE x,0 */
917 if (is_tst_cond(c)) {
918 args[3] = args[2] = arg_new_constant(ctx, 0);
919 args[4] = tcg_tst_eqne_cond(c);
920 return -1;
921 }
922 }
923
924 /* Expand to AND with a temporary if no backend support. */
925 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
926 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
927 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
928 TCGArg t1 = arg_new_temp(ctx);
929 TCGArg t2 = arg_new_temp(ctx);
930
931 op1->args[0] = t1;
932 op1->args[1] = al;
933 op1->args[2] = bl;
934 fold_and(ctx, op1);
935
936 op2->args[0] = t2;
937 op2->args[1] = ah;
938 op2->args[2] = bh;
939 fold_and(ctx, op1);
940
941 args[0] = t1;
942 args[1] = t2;
943 args[3] = args[2] = arg_new_constant(ctx, 0);
944 args[4] = tcg_tst_eqne_cond(c);
945 }
946 return -1;
947 }
948
init_arguments(OptContext * ctx,TCGOp * op,int nb_args)949 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
950 {
951 for (int i = 0; i < nb_args; i++) {
952 TCGTemp *ts = arg_temp(op->args[i]);
953 init_ts_info(ctx, ts);
954 }
955 }
956
copy_propagate(OptContext * ctx,TCGOp * op,int nb_oargs,int nb_iargs)957 static void copy_propagate(OptContext *ctx, TCGOp *op,
958 int nb_oargs, int nb_iargs)
959 {
960 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
961 TCGTemp *ts = arg_temp(op->args[i]);
962 if (ts_is_copy(ts)) {
963 op->args[i] = temp_arg(find_better_copy(ts));
964 }
965 }
966 }
967
finish_bb(OptContext * ctx)968 static void finish_bb(OptContext *ctx)
969 {
970 /* We only optimize memory barriers across basic blocks. */
971 ctx->prev_mb = NULL;
972 }
973
finish_ebb(OptContext * ctx)974 static void finish_ebb(OptContext *ctx)
975 {
976 finish_bb(ctx);
977 /* We only optimize across extended basic blocks. */
978 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
979 remove_mem_copy_all(ctx);
980 }
981
finish_folding(OptContext * ctx,TCGOp * op)982 static bool finish_folding(OptContext *ctx, TCGOp *op)
983 {
984 const TCGOpDef *def = &tcg_op_defs[op->opc];
985 int i, nb_oargs;
986
987 nb_oargs = def->nb_oargs;
988 for (i = 0; i < nb_oargs; i++) {
989 TCGTemp *ts = arg_temp(op->args[i]);
990 reset_ts(ctx, ts);
991 }
992 return true;
993 }
994
995 /*
996 * The fold_* functions return true when processing is complete,
997 * usually by folding the operation to a constant or to a copy,
998 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
999 * like collect information about the value produced, for use in
1000 * optimizing a subsequent operation.
1001 *
1002 * These first fold_* functions are all helpers, used by other
1003 * folders for more specific operations.
1004 */
1005
fold_const1(OptContext * ctx,TCGOp * op)1006 static bool fold_const1(OptContext *ctx, TCGOp *op)
1007 {
1008 if (arg_is_const(op->args[1])) {
1009 uint64_t t = arg_const_val(op->args[1]);
1010
1011 t = do_constant_folding(op->opc, ctx->type, t, 0);
1012 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1013 }
1014 return false;
1015 }
1016
fold_const2(OptContext * ctx,TCGOp * op)1017 static bool fold_const2(OptContext *ctx, TCGOp *op)
1018 {
1019 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1020 uint64_t t1 = arg_const_val(op->args[1]);
1021 uint64_t t2 = arg_const_val(op->args[2]);
1022
1023 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1024 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1025 }
1026 return false;
1027 }
1028
fold_commutative(OptContext * ctx,TCGOp * op)1029 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1030 {
1031 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1032 return false;
1033 }
1034
fold_const2_commutative(OptContext * ctx,TCGOp * op)1035 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1036 {
1037 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038 return fold_const2(ctx, op);
1039 }
1040
1041 /*
1042 * Record "zero" and "sign" masks for the single output of @op.
1043 * See TempOptInfo definition of z_mask and s_mask.
1044 * If z_mask allows, fold the output to constant zero.
1045 * The passed s_mask may be augmented by z_mask.
1046 */
fold_masks_zosa_int(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1047 static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
1048 uint64_t z_mask, uint64_t o_mask,
1049 int64_t s_mask, uint64_t a_mask)
1050 {
1051 const TCGOpDef *def = &tcg_op_defs[op->opc];
1052 TCGTemp *ts;
1053 TempOptInfo *ti;
1054 int rep;
1055
1056 /* Only single-output opcodes are supported here. */
1057 tcg_debug_assert(def->nb_oargs == 1);
1058
1059 /*
1060 * 32-bit ops generate 32-bit results, which for the purpose of
1061 * simplifying tcg are sign-extended. Certainly that's how we
1062 * represent our constants elsewhere. Note that the bits will
1063 * be reset properly for a 64-bit value when encountering the
1064 * type changing opcodes.
1065 */
1066 if (ctx->type == TCG_TYPE_I32) {
1067 z_mask = (int32_t)z_mask;
1068 o_mask = (int32_t)o_mask;
1069 s_mask |= INT32_MIN;
1070 a_mask = (uint32_t)a_mask;
1071 }
1072
1073 /* Bits that are known 1 and bits that are known 0 must not overlap. */
1074 tcg_debug_assert((o_mask & ~z_mask) == 0);
1075
1076 /* All bits that are not known zero are known one is a constant. */
1077 if (z_mask == o_mask) {
1078 return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
1079 }
1080
1081 /* If no bits are affected, the operation devolves to a copy. */
1082 if (a_mask == 0) {
1083 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1084 }
1085
1086 ts = arg_temp(op->args[0]);
1087 reset_ts(ctx, ts);
1088
1089 ti = ts_info(ts);
1090 ti->z_mask = z_mask;
1091
1092 /* Canonicalize s_mask and incorporate data from z_mask. */
1093 rep = clz64(~s_mask);
1094 rep = MAX(rep, clz64(z_mask));
1095 rep = MAX(rep, clz64(~o_mask));
1096 rep = MAX(rep - 1, 0);
1097 ti->s_mask = INT64_MIN >> rep;
1098
1099 return false;
1100 }
1101
fold_masks_zosa(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1102 static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
1103 uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
1104 {
1105 fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1);
1106 return true;
1107 }
1108
fold_masks_zos(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,uint64_t s_mask)1109 static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
1110 uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
1111 {
1112 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
1113 }
1114
fold_masks_zo(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask)1115 static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
1116 uint64_t z_mask, uint64_t o_mask)
1117 {
1118 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
1119 }
1120
fold_masks_zs(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t s_mask)1121 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1122 uint64_t z_mask, uint64_t s_mask)
1123 {
1124 return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
1125 }
1126
fold_masks_z(OptContext * ctx,TCGOp * op,uint64_t z_mask)1127 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1128 {
1129 return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
1130 }
1131
fold_masks_s(OptContext * ctx,TCGOp * op,uint64_t s_mask)1132 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1133 {
1134 return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
1135 }
1136
1137 /*
1138 * Convert @op to NOT, if NOT is supported by the host.
1139 * Return true f the conversion is successful, which will still
1140 * indicate that the processing is complete.
1141 */
1142 static bool fold_not(OptContext *ctx, TCGOp *op);
fold_to_not(OptContext * ctx,TCGOp * op,int idx)1143 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1144 {
1145 TCGOpcode not_op;
1146 bool have_not;
1147
1148 switch (ctx->type) {
1149 case TCG_TYPE_I32:
1150 case TCG_TYPE_I64:
1151 not_op = INDEX_op_not;
1152 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1153 break;
1154 case TCG_TYPE_V64:
1155 case TCG_TYPE_V128:
1156 case TCG_TYPE_V256:
1157 not_op = INDEX_op_not_vec;
1158 have_not = TCG_TARGET_HAS_not_vec;
1159 break;
1160 default:
1161 g_assert_not_reached();
1162 }
1163 if (have_not) {
1164 op->opc = not_op;
1165 op->args[1] = op->args[idx];
1166 return fold_not(ctx, op);
1167 }
1168 return false;
1169 }
1170
1171 /* If the binary operation has first argument @i, fold to @i. */
fold_ix_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1172 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1173 {
1174 if (arg_is_const_val(op->args[1], i)) {
1175 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1176 }
1177 return false;
1178 }
1179
1180 /* If the binary operation has first argument @i, fold to NOT. */
fold_ix_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1181 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1182 {
1183 if (arg_is_const_val(op->args[1], i)) {
1184 return fold_to_not(ctx, op, 2);
1185 }
1186 return false;
1187 }
1188
1189 /* If the binary operation has second argument @i, fold to @i. */
fold_xi_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1190 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1191 {
1192 if (arg_is_const_val(op->args[2], i)) {
1193 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1194 }
1195 return false;
1196 }
1197
1198 /* If the binary operation has second argument @i, fold to identity. */
fold_xi_to_x(OptContext * ctx,TCGOp * op,uint64_t i)1199 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1200 {
1201 if (arg_is_const_val(op->args[2], i)) {
1202 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1203 }
1204 return false;
1205 }
1206
1207 /* If the binary operation has second argument @i, fold to NOT. */
fold_xi_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1208 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1209 {
1210 if (arg_is_const_val(op->args[2], i)) {
1211 return fold_to_not(ctx, op, 1);
1212 }
1213 return false;
1214 }
1215
1216 /* If the binary operation has both arguments equal, fold to @i. */
fold_xx_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1217 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1218 {
1219 if (args_are_copies(op->args[1], op->args[2])) {
1220 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1221 }
1222 return false;
1223 }
1224
1225 /* If the binary operation has both arguments equal, fold to identity. */
fold_xx_to_x(OptContext * ctx,TCGOp * op)1226 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1227 {
1228 if (args_are_copies(op->args[1], op->args[2])) {
1229 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1230 }
1231 return false;
1232 }
1233
1234 /*
1235 * These outermost fold_<op> functions are sorted alphabetically.
1236 *
1237 * The ordering of the transformations should be:
1238 * 1) those that produce a constant
1239 * 2) those that produce a copy
1240 * 3) those that produce information about the result value.
1241 */
1242
1243 static bool fold_addco(OptContext *ctx, TCGOp *op);
1244 static bool fold_or(OptContext *ctx, TCGOp *op);
1245 static bool fold_orc(OptContext *ctx, TCGOp *op);
1246 static bool fold_subbo(OptContext *ctx, TCGOp *op);
1247 static bool fold_xor(OptContext *ctx, TCGOp *op);
1248
fold_add(OptContext * ctx,TCGOp * op)1249 static bool fold_add(OptContext *ctx, TCGOp *op)
1250 {
1251 if (fold_const2_commutative(ctx, op) ||
1252 fold_xi_to_x(ctx, op, 0)) {
1253 return true;
1254 }
1255 return finish_folding(ctx, op);
1256 }
1257
1258 /* We cannot as yet do_constant_folding with vectors. */
fold_add_vec(OptContext * ctx,TCGOp * op)1259 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1260 {
1261 if (fold_commutative(ctx, op) ||
1262 fold_xi_to_x(ctx, op, 0)) {
1263 return true;
1264 }
1265 return finish_folding(ctx, op);
1266 }
1267
squash_prev_carryout(OptContext * ctx,TCGOp * op)1268 static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1269 {
1270 TempOptInfo *t2;
1271
1272 op = QTAILQ_PREV(op, link);
1273 switch (op->opc) {
1274 case INDEX_op_addco:
1275 op->opc = INDEX_op_add;
1276 fold_add(ctx, op);
1277 break;
1278 case INDEX_op_addcio:
1279 op->opc = INDEX_op_addci;
1280 break;
1281 case INDEX_op_addc1o:
1282 op->opc = INDEX_op_add;
1283 t2 = arg_info(op->args[2]);
1284 if (ti_is_const(t2)) {
1285 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1286 /* Perform other constant folding, if needed. */
1287 fold_add(ctx, op);
1288 } else {
1289 TCGArg ret = op->args[0];
1290 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1291 op->args[0] = ret;
1292 op->args[1] = ret;
1293 op->args[2] = arg_new_constant(ctx, 1);
1294 }
1295 break;
1296 default:
1297 g_assert_not_reached();
1298 }
1299 }
1300
fold_addci(OptContext * ctx,TCGOp * op)1301 static bool fold_addci(OptContext *ctx, TCGOp *op)
1302 {
1303 fold_commutative(ctx, op);
1304
1305 if (ctx->carry_state < 0) {
1306 return finish_folding(ctx, op);
1307 }
1308
1309 squash_prev_carryout(ctx, op);
1310 op->opc = INDEX_op_add;
1311
1312 if (ctx->carry_state > 0) {
1313 TempOptInfo *t2 = arg_info(op->args[2]);
1314
1315 /*
1316 * Propagate the known carry-in into a constant, if possible.
1317 * Otherwise emit a second add +1.
1318 */
1319 if (ti_is_const(t2)) {
1320 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1321 } else {
1322 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1323
1324 op2->args[0] = op->args[0];
1325 op2->args[1] = op->args[1];
1326 op2->args[2] = op->args[2];
1327 fold_add(ctx, op2);
1328
1329 op->args[1] = op->args[0];
1330 op->args[2] = arg_new_constant(ctx, 1);
1331 }
1332 }
1333
1334 ctx->carry_state = -1;
1335 return fold_add(ctx, op);
1336 }
1337
fold_addcio(OptContext * ctx,TCGOp * op)1338 static bool fold_addcio(OptContext *ctx, TCGOp *op)
1339 {
1340 TempOptInfo *t1, *t2;
1341 int carry_out = -1;
1342 uint64_t sum, max;
1343
1344 fold_commutative(ctx, op);
1345 t1 = arg_info(op->args[1]);
1346 t2 = arg_info(op->args[2]);
1347
1348 /*
1349 * The z_mask value is >= the maximum value that can be represented
1350 * with the known zero bits. So adding the z_mask values will not
1351 * overflow if and only if the true values cannot overflow.
1352 */
1353 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1354 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1355 carry_out = 0;
1356 }
1357
1358 if (ctx->carry_state < 0) {
1359 ctx->carry_state = carry_out;
1360 return finish_folding(ctx, op);
1361 }
1362
1363 squash_prev_carryout(ctx, op);
1364 if (ctx->carry_state == 0) {
1365 goto do_addco;
1366 }
1367
1368 /* Propagate the known carry-in into a constant, if possible. */
1369 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1370 if (ti_is_const(t2)) {
1371 uint64_t v = ti_const_val(t2) & max;
1372 if (v < max) {
1373 op->args[2] = arg_new_constant(ctx, v + 1);
1374 goto do_addco;
1375 }
1376 /* max + known carry in produces known carry out. */
1377 carry_out = 1;
1378 }
1379 if (ti_is_const(t1)) {
1380 uint64_t v = ti_const_val(t1) & max;
1381 if (v < max) {
1382 op->args[1] = arg_new_constant(ctx, v + 1);
1383 goto do_addco;
1384 }
1385 carry_out = 1;
1386 }
1387
1388 /* Adjust the opcode to remember the known carry-in. */
1389 op->opc = INDEX_op_addc1o;
1390 ctx->carry_state = carry_out;
1391 return finish_folding(ctx, op);
1392
1393 do_addco:
1394 op->opc = INDEX_op_addco;
1395 return fold_addco(ctx, op);
1396 }
1397
fold_addco(OptContext * ctx,TCGOp * op)1398 static bool fold_addco(OptContext *ctx, TCGOp *op)
1399 {
1400 TempOptInfo *t1, *t2;
1401 int carry_out = -1;
1402 uint64_t ign;
1403
1404 fold_commutative(ctx, op);
1405 t1 = arg_info(op->args[1]);
1406 t2 = arg_info(op->args[2]);
1407
1408 if (ti_is_const(t2)) {
1409 uint64_t v2 = ti_const_val(t2);
1410
1411 if (ti_is_const(t1)) {
1412 uint64_t v1 = ti_const_val(t1);
1413 /* Given sign-extension of z_mask for I32, we need not truncate. */
1414 carry_out = uadd64_overflow(v1, v2, &ign);
1415 } else if (v2 == 0) {
1416 carry_out = 0;
1417 }
1418 } else {
1419 /*
1420 * The z_mask value is >= the maximum value that can be represented
1421 * with the known zero bits. So adding the z_mask values will not
1422 * overflow if and only if the true values cannot overflow.
1423 */
1424 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1425 carry_out = 0;
1426 }
1427 }
1428 ctx->carry_state = carry_out;
1429 return finish_folding(ctx, op);
1430 }
1431
fold_and(OptContext * ctx,TCGOp * op)1432 static bool fold_and(OptContext *ctx, TCGOp *op)
1433 {
1434 uint64_t z_mask, o_mask, s_mask, a_mask;
1435 TempOptInfo *t1, *t2;
1436
1437 if (fold_const2_commutative(ctx, op)) {
1438 return true;
1439 }
1440
1441 t1 = arg_info(op->args[1]);
1442 t2 = arg_info(op->args[2]);
1443
1444 z_mask = t1->z_mask & t2->z_mask;
1445 o_mask = t1->o_mask & t2->o_mask;
1446
1447 /*
1448 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1449 * Bitwise operations preserve the relative quantity of the repetitions.
1450 */
1451 s_mask = t1->s_mask & t2->s_mask;
1452
1453 /* Affected bits are those not known zero, masked by those known one. */
1454 a_mask = t1->z_mask & ~t2->o_mask;
1455
1456 if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
1457 if (ti_is_const(t2)) {
1458 /*
1459 * Canonicalize on extract, if valid. This aids x86 with its
1460 * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
1461 * which does not require matching operands. Other backends can
1462 * trivially expand the extract to AND during code generation.
1463 */
1464 uint64_t val = ti_const_val(t2);
1465 if (!(val & (val + 1))) {
1466 unsigned len = ctz64(~val);
1467 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
1468 op->opc = INDEX_op_extract;
1469 op->args[2] = 0;
1470 op->args[3] = len;
1471 }
1472 }
1473 } else {
1474 fold_xx_to_x(ctx, op);
1475 }
1476 }
1477 return true;
1478 }
1479
fold_andc(OptContext * ctx,TCGOp * op)1480 static bool fold_andc(OptContext *ctx, TCGOp *op)
1481 {
1482 uint64_t z_mask, o_mask, s_mask, a_mask;
1483 TempOptInfo *t1, *t2;
1484
1485 if (fold_const2(ctx, op)) {
1486 return true;
1487 }
1488
1489 t1 = arg_info(op->args[1]);
1490 t2 = arg_info(op->args[2]);
1491
1492 if (ti_is_const(t2)) {
1493 /* Fold andc r,x,i to and r,x,~i. */
1494 switch (ctx->type) {
1495 case TCG_TYPE_I32:
1496 case TCG_TYPE_I64:
1497 op->opc = INDEX_op_and;
1498 break;
1499 case TCG_TYPE_V64:
1500 case TCG_TYPE_V128:
1501 case TCG_TYPE_V256:
1502 op->opc = INDEX_op_and_vec;
1503 break;
1504 default:
1505 g_assert_not_reached();
1506 }
1507 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1508 return fold_and(ctx, op);
1509 }
1510 if (fold_xx_to_i(ctx, op, 0) ||
1511 fold_ix_to_not(ctx, op, -1)) {
1512 return true;
1513 }
1514
1515 z_mask = t1->z_mask & ~t2->o_mask;
1516 o_mask = t1->o_mask & ~t2->z_mask;
1517 s_mask = t1->s_mask & t2->s_mask;
1518
1519 /* Affected bits are those not known zero, masked by those known zero. */
1520 a_mask = t1->z_mask & t2->z_mask;
1521
1522 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
1523 }
1524
fold_bitsel_vec(OptContext * ctx,TCGOp * op)1525 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1526 {
1527 /* If true and false values are the same, eliminate the cmp. */
1528 if (args_are_copies(op->args[2], op->args[3])) {
1529 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1530 }
1531
1532 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1533 uint64_t tv = arg_const_val(op->args[2]);
1534 uint64_t fv = arg_const_val(op->args[3]);
1535
1536 if (tv == -1 && fv == 0) {
1537 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1538 }
1539 if (tv == 0 && fv == -1) {
1540 if (TCG_TARGET_HAS_not_vec) {
1541 op->opc = INDEX_op_not_vec;
1542 return fold_not(ctx, op);
1543 } else {
1544 op->opc = INDEX_op_xor_vec;
1545 op->args[2] = arg_new_constant(ctx, -1);
1546 return fold_xor(ctx, op);
1547 }
1548 }
1549 }
1550 if (arg_is_const(op->args[2])) {
1551 uint64_t tv = arg_const_val(op->args[2]);
1552 if (tv == -1) {
1553 op->opc = INDEX_op_or_vec;
1554 op->args[2] = op->args[3];
1555 return fold_or(ctx, op);
1556 }
1557 if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1558 op->opc = INDEX_op_andc_vec;
1559 op->args[2] = op->args[1];
1560 op->args[1] = op->args[3];
1561 return fold_andc(ctx, op);
1562 }
1563 }
1564 if (arg_is_const(op->args[3])) {
1565 uint64_t fv = arg_const_val(op->args[3]);
1566 if (fv == 0) {
1567 op->opc = INDEX_op_and_vec;
1568 return fold_and(ctx, op);
1569 }
1570 if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1571 op->opc = INDEX_op_orc_vec;
1572 op->args[2] = op->args[1];
1573 op->args[1] = op->args[3];
1574 return fold_orc(ctx, op);
1575 }
1576 }
1577 return finish_folding(ctx, op);
1578 }
1579
fold_brcond(OptContext * ctx,TCGOp * op)1580 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1581 {
1582 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1583 &op->args[1], &op->args[2]);
1584 if (i == 0) {
1585 tcg_op_remove(ctx->tcg, op);
1586 return true;
1587 }
1588 if (i > 0) {
1589 op->opc = INDEX_op_br;
1590 op->args[0] = op->args[3];
1591 finish_ebb(ctx);
1592 } else {
1593 finish_bb(ctx);
1594 }
1595 return true;
1596 }
1597
fold_brcond2(OptContext * ctx,TCGOp * op)1598 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1599 {
1600 TCGCond cond;
1601 TCGArg label;
1602 int i, inv = 0;
1603
1604 i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1605 cond = op->args[4];
1606 label = op->args[5];
1607 if (i >= 0) {
1608 goto do_brcond_const;
1609 }
1610
1611 switch (cond) {
1612 case TCG_COND_LT:
1613 case TCG_COND_GE:
1614 /*
1615 * Simplify LT/GE comparisons vs zero to a single compare
1616 * vs the high word of the input.
1617 */
1618 if (arg_is_const_val(op->args[2], 0) &&
1619 arg_is_const_val(op->args[3], 0)) {
1620 goto do_brcond_high;
1621 }
1622 break;
1623
1624 case TCG_COND_NE:
1625 inv = 1;
1626 QEMU_FALLTHROUGH;
1627 case TCG_COND_EQ:
1628 /*
1629 * Simplify EQ/NE comparisons where one of the pairs
1630 * can be simplified.
1631 */
1632 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1633 op->args[2], cond);
1634 switch (i ^ inv) {
1635 case 0:
1636 goto do_brcond_const;
1637 case 1:
1638 goto do_brcond_high;
1639 }
1640
1641 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1642 op->args[3], cond);
1643 switch (i ^ inv) {
1644 case 0:
1645 goto do_brcond_const;
1646 case 1:
1647 goto do_brcond_low;
1648 }
1649 break;
1650
1651 case TCG_COND_TSTEQ:
1652 case TCG_COND_TSTNE:
1653 if (arg_is_const_val(op->args[2], 0)) {
1654 goto do_brcond_high;
1655 }
1656 if (arg_is_const_val(op->args[3], 0)) {
1657 goto do_brcond_low;
1658 }
1659 break;
1660
1661 default:
1662 break;
1663
1664 do_brcond_low:
1665 op->opc = INDEX_op_brcond;
1666 op->args[1] = op->args[2];
1667 op->args[2] = cond;
1668 op->args[3] = label;
1669 return fold_brcond(ctx, op);
1670
1671 do_brcond_high:
1672 op->opc = INDEX_op_brcond;
1673 op->args[0] = op->args[1];
1674 op->args[1] = op->args[3];
1675 op->args[2] = cond;
1676 op->args[3] = label;
1677 return fold_brcond(ctx, op);
1678
1679 do_brcond_const:
1680 if (i == 0) {
1681 tcg_op_remove(ctx->tcg, op);
1682 return true;
1683 }
1684 op->opc = INDEX_op_br;
1685 op->args[0] = label;
1686 finish_ebb(ctx);
1687 return true;
1688 }
1689
1690 finish_bb(ctx);
1691 return true;
1692 }
1693
fold_bswap(OptContext * ctx,TCGOp * op)1694 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1695 {
1696 uint64_t z_mask, o_mask, s_mask;
1697 TempOptInfo *t1 = arg_info(op->args[1]);
1698 int flags = op->args[2];
1699
1700 if (ti_is_const(t1)) {
1701 return tcg_opt_gen_movi(ctx, op, op->args[0],
1702 do_constant_folding(op->opc, ctx->type,
1703 ti_const_val(t1), flags));
1704 }
1705
1706 z_mask = t1->z_mask;
1707 o_mask = t1->o_mask;
1708 s_mask = 0;
1709
1710 switch (op->opc) {
1711 case INDEX_op_bswap16:
1712 z_mask = bswap16(z_mask);
1713 o_mask = bswap16(o_mask);
1714 if (flags & TCG_BSWAP_OS) {
1715 z_mask = (int16_t)z_mask;
1716 o_mask = (int16_t)o_mask;
1717 s_mask = INT16_MIN;
1718 } else if (!(flags & TCG_BSWAP_OZ)) {
1719 z_mask |= MAKE_64BIT_MASK(16, 48);
1720 }
1721 break;
1722 case INDEX_op_bswap32:
1723 z_mask = bswap32(z_mask);
1724 o_mask = bswap32(o_mask);
1725 if (flags & TCG_BSWAP_OS) {
1726 z_mask = (int32_t)z_mask;
1727 o_mask = (int32_t)o_mask;
1728 s_mask = INT32_MIN;
1729 } else if (!(flags & TCG_BSWAP_OZ)) {
1730 z_mask |= MAKE_64BIT_MASK(32, 32);
1731 }
1732 break;
1733 case INDEX_op_bswap64:
1734 z_mask = bswap64(z_mask);
1735 o_mask = bswap64(o_mask);
1736 break;
1737 default:
1738 g_assert_not_reached();
1739 }
1740
1741 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1742 }
1743
fold_call(OptContext * ctx,TCGOp * op)1744 static bool fold_call(OptContext *ctx, TCGOp *op)
1745 {
1746 TCGContext *s = ctx->tcg;
1747 int nb_oargs = TCGOP_CALLO(op);
1748 int nb_iargs = TCGOP_CALLI(op);
1749 int flags, i;
1750
1751 init_arguments(ctx, op, nb_oargs + nb_iargs);
1752 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1753
1754 /* If the function reads or writes globals, reset temp data. */
1755 flags = tcg_call_flags(op);
1756 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1757 int nb_globals = s->nb_globals;
1758
1759 for (i = 0; i < nb_globals; i++) {
1760 if (test_bit(i, ctx->temps_used.l)) {
1761 reset_ts(ctx, &ctx->tcg->temps[i]);
1762 }
1763 }
1764 }
1765
1766 /* If the function has side effects, reset mem data. */
1767 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1768 remove_mem_copy_all(ctx);
1769 }
1770
1771 /* Reset temp data for outputs. */
1772 for (i = 0; i < nb_oargs; i++) {
1773 reset_temp(ctx, op->args[i]);
1774 }
1775
1776 /* Stop optimizing MB across calls. */
1777 ctx->prev_mb = NULL;
1778 return true;
1779 }
1780
fold_cmp_vec(OptContext * ctx,TCGOp * op)1781 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1782 {
1783 /* Canonicalize the comparison to put immediate second. */
1784 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1785 op->args[3] = tcg_swap_cond(op->args[3]);
1786 }
1787 return finish_folding(ctx, op);
1788 }
1789
fold_cmpsel_vec(OptContext * ctx,TCGOp * op)1790 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1791 {
1792 /* If true and false values are the same, eliminate the cmp. */
1793 if (args_are_copies(op->args[3], op->args[4])) {
1794 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1795 }
1796
1797 /* Canonicalize the comparison to put immediate second. */
1798 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1799 op->args[5] = tcg_swap_cond(op->args[5]);
1800 }
1801 /*
1802 * Canonicalize the "false" input reg to match the destination,
1803 * so that the tcg backend can implement "move if true".
1804 */
1805 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1806 op->args[5] = tcg_invert_cond(op->args[5]);
1807 }
1808 return finish_folding(ctx, op);
1809 }
1810
fold_count_zeros(OptContext * ctx,TCGOp * op)1811 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1812 {
1813 uint64_t z_mask, s_mask;
1814 TempOptInfo *t1 = arg_info(op->args[1]);
1815 TempOptInfo *t2 = arg_info(op->args[2]);
1816
1817 if (ti_is_const(t1)) {
1818 uint64_t t = ti_const_val(t1);
1819
1820 if (t != 0) {
1821 t = do_constant_folding(op->opc, ctx->type, t, 0);
1822 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1823 }
1824 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1825 }
1826
1827 switch (ctx->type) {
1828 case TCG_TYPE_I32:
1829 z_mask = 31;
1830 break;
1831 case TCG_TYPE_I64:
1832 z_mask = 63;
1833 break;
1834 default:
1835 g_assert_not_reached();
1836 }
1837 s_mask = ~z_mask;
1838 z_mask |= t2->z_mask;
1839 s_mask &= t2->s_mask;
1840
1841 return fold_masks_zs(ctx, op, z_mask, s_mask);
1842 }
1843
fold_ctpop(OptContext * ctx,TCGOp * op)1844 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1845 {
1846 uint64_t z_mask;
1847
1848 if (fold_const1(ctx, op)) {
1849 return true;
1850 }
1851
1852 switch (ctx->type) {
1853 case TCG_TYPE_I32:
1854 z_mask = 32 | 31;
1855 break;
1856 case TCG_TYPE_I64:
1857 z_mask = 64 | 63;
1858 break;
1859 default:
1860 g_assert_not_reached();
1861 }
1862 return fold_masks_z(ctx, op, z_mask);
1863 }
1864
fold_deposit(OptContext * ctx,TCGOp * op)1865 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1866 {
1867 TempOptInfo *t1 = arg_info(op->args[1]);
1868 TempOptInfo *t2 = arg_info(op->args[2]);
1869 int ofs = op->args[3];
1870 int len = op->args[4];
1871 int width = 8 * tcg_type_size(ctx->type);
1872 uint64_t z_mask, o_mask, s_mask;
1873
1874 if (ti_is_const(t1) && ti_is_const(t2)) {
1875 return tcg_opt_gen_movi(ctx, op, op->args[0],
1876 deposit64(ti_const_val(t1), ofs, len,
1877 ti_const_val(t2)));
1878 }
1879
1880 /* Inserting a value into zero at offset 0. */
1881 if (ti_is_const_val(t1, 0) && ofs == 0) {
1882 uint64_t mask = MAKE_64BIT_MASK(0, len);
1883
1884 op->opc = INDEX_op_and;
1885 op->args[1] = op->args[2];
1886 op->args[2] = arg_new_constant(ctx, mask);
1887 return fold_and(ctx, op);
1888 }
1889
1890 /* Inserting zero into a value. */
1891 if (ti_is_const_val(t2, 0)) {
1892 uint64_t mask = deposit64(-1, ofs, len, 0);
1893
1894 op->opc = INDEX_op_and;
1895 op->args[2] = arg_new_constant(ctx, mask);
1896 return fold_and(ctx, op);
1897 }
1898
1899 /* The s_mask from the top portion of the deposit is still valid. */
1900 if (ofs + len == width) {
1901 s_mask = t2->s_mask << ofs;
1902 } else {
1903 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1904 }
1905
1906 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1907 o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
1908
1909 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1910 }
1911
fold_divide(OptContext * ctx,TCGOp * op)1912 static bool fold_divide(OptContext *ctx, TCGOp *op)
1913 {
1914 if (fold_const2(ctx, op) ||
1915 fold_xi_to_x(ctx, op, 1)) {
1916 return true;
1917 }
1918 return finish_folding(ctx, op);
1919 }
1920
fold_dup(OptContext * ctx,TCGOp * op)1921 static bool fold_dup(OptContext *ctx, TCGOp *op)
1922 {
1923 if (arg_is_const(op->args[1])) {
1924 uint64_t t = arg_const_val(op->args[1]);
1925 t = dup_const(TCGOP_VECE(op), t);
1926 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1927 }
1928 return finish_folding(ctx, op);
1929 }
1930
fold_dup2(OptContext * ctx,TCGOp * op)1931 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1932 {
1933 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1934 uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
1935 arg_const_val(op->args[2]));
1936 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1937 }
1938
1939 if (args_are_copies(op->args[1], op->args[2])) {
1940 op->opc = INDEX_op_dup_vec;
1941 TCGOP_VECE(op) = MO_32;
1942 }
1943 return finish_folding(ctx, op);
1944 }
1945
fold_eqv(OptContext * ctx,TCGOp * op)1946 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1947 {
1948 uint64_t z_mask, o_mask, s_mask;
1949 TempOptInfo *t1, *t2;
1950
1951 if (fold_const2_commutative(ctx, op)) {
1952 return true;
1953 }
1954
1955 t2 = arg_info(op->args[2]);
1956 if (ti_is_const(t2)) {
1957 /* Fold eqv r,x,i to xor r,x,~i. */
1958 switch (ctx->type) {
1959 case TCG_TYPE_I32:
1960 case TCG_TYPE_I64:
1961 op->opc = INDEX_op_xor;
1962 break;
1963 case TCG_TYPE_V64:
1964 case TCG_TYPE_V128:
1965 case TCG_TYPE_V256:
1966 op->opc = INDEX_op_xor_vec;
1967 break;
1968 default:
1969 g_assert_not_reached();
1970 }
1971 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1972 return fold_xor(ctx, op);
1973 }
1974
1975 t1 = arg_info(op->args[1]);
1976
1977 z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
1978 o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
1979 s_mask = t1->s_mask & t2->s_mask;
1980
1981 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1982 }
1983
fold_extract(OptContext * ctx,TCGOp * op)1984 static bool fold_extract(OptContext *ctx, TCGOp *op)
1985 {
1986 uint64_t z_mask, o_mask, a_mask;
1987 TempOptInfo *t1 = arg_info(op->args[1]);
1988 int pos = op->args[2];
1989 int len = op->args[3];
1990
1991 if (ti_is_const(t1)) {
1992 return tcg_opt_gen_movi(ctx, op, op->args[0],
1993 extract64(ti_const_val(t1), pos, len));
1994 }
1995
1996 z_mask = extract64(t1->z_mask, pos, len);
1997 o_mask = extract64(t1->o_mask, pos, len);
1998 a_mask = pos ? -1 : t1->z_mask ^ z_mask;
1999
2000 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
2001 }
2002
fold_extract2(OptContext * ctx,TCGOp * op)2003 static bool fold_extract2(OptContext *ctx, TCGOp *op)
2004 {
2005 TempOptInfo *t1 = arg_info(op->args[1]);
2006 TempOptInfo *t2 = arg_info(op->args[2]);
2007 uint64_t z1 = t1->z_mask;
2008 uint64_t z2 = t2->z_mask;
2009 uint64_t o1 = t1->o_mask;
2010 uint64_t o2 = t2->o_mask;
2011 int shr = op->args[3];
2012
2013 if (ctx->type == TCG_TYPE_I32) {
2014 z1 = (uint32_t)z1 >> shr;
2015 o1 = (uint32_t)o1 >> shr;
2016 z2 = (uint64_t)((int32_t)z2 << (32 - shr));
2017 o2 = (uint64_t)((int32_t)o2 << (32 - shr));
2018 } else {
2019 z1 >>= shr;
2020 o1 >>= shr;
2021 z2 <<= 64 - shr;
2022 o2 <<= 64 - shr;
2023 }
2024
2025 return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
2026 }
2027
fold_exts(OptContext * ctx,TCGOp * op)2028 static bool fold_exts(OptContext *ctx, TCGOp *op)
2029 {
2030 uint64_t z_mask, o_mask, s_mask;
2031 TempOptInfo *t1;
2032
2033 if (fold_const1(ctx, op)) {
2034 return true;
2035 }
2036
2037 t1 = arg_info(op->args[1]);
2038 z_mask = t1->z_mask;
2039 o_mask = t1->o_mask;
2040 s_mask = t1->s_mask;
2041
2042 switch (op->opc) {
2043 case INDEX_op_ext_i32_i64:
2044 s_mask |= INT32_MIN;
2045 z_mask = (int32_t)z_mask;
2046 o_mask = (int32_t)o_mask;
2047 break;
2048 default:
2049 g_assert_not_reached();
2050 }
2051 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2052 }
2053
fold_extu(OptContext * ctx,TCGOp * op)2054 static bool fold_extu(OptContext *ctx, TCGOp *op)
2055 {
2056 uint64_t z_mask, o_mask;
2057 TempOptInfo *t1;
2058
2059 if (fold_const1(ctx, op)) {
2060 return true;
2061 }
2062
2063 t1 = arg_info(op->args[1]);
2064 z_mask = t1->z_mask;
2065 o_mask = t1->o_mask;
2066
2067 switch (op->opc) {
2068 case INDEX_op_extrl_i64_i32:
2069 case INDEX_op_extu_i32_i64:
2070 z_mask = (uint32_t)z_mask;
2071 o_mask = (uint32_t)o_mask;
2072 break;
2073 case INDEX_op_extrh_i64_i32:
2074 z_mask >>= 32;
2075 o_mask >>= 32;
2076 break;
2077 default:
2078 g_assert_not_reached();
2079 }
2080 return fold_masks_zo(ctx, op, z_mask, o_mask);
2081 }
2082
fold_mb(OptContext * ctx,TCGOp * op)2083 static bool fold_mb(OptContext *ctx, TCGOp *op)
2084 {
2085 /* Eliminate duplicate and redundant fence instructions. */
2086 if (ctx->prev_mb) {
2087 /*
2088 * Merge two barriers of the same type into one,
2089 * or a weaker barrier into a stronger one,
2090 * or two weaker barriers into a stronger one.
2091 * mb X; mb Y => mb X|Y
2092 * mb; strl => mb; st
2093 * ldaq; mb => ld; mb
2094 * ldaq; strl => ld; mb; st
2095 * Other combinations are also merged into a strong
2096 * barrier. This is stricter than specified but for
2097 * the purposes of TCG is better than not optimizing.
2098 */
2099 ctx->prev_mb->args[0] |= op->args[0];
2100 tcg_op_remove(ctx->tcg, op);
2101 } else {
2102 ctx->prev_mb = op;
2103 }
2104 return true;
2105 }
2106
fold_mov(OptContext * ctx,TCGOp * op)2107 static bool fold_mov(OptContext *ctx, TCGOp *op)
2108 {
2109 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2110 }
2111
fold_movcond(OptContext * ctx,TCGOp * op)2112 static bool fold_movcond(OptContext *ctx, TCGOp *op)
2113 {
2114 uint64_t z_mask, o_mask, s_mask;
2115 TempOptInfo *tt, *ft;
2116 int i;
2117
2118 /* If true and false values are the same, eliminate the cmp. */
2119 if (args_are_copies(op->args[3], op->args[4])) {
2120 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2121 }
2122
2123 /*
2124 * Canonicalize the "false" input reg to match the destination reg so
2125 * that the tcg backend can implement a "move if true" operation.
2126 */
2127 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2128 op->args[5] = tcg_invert_cond(op->args[5]);
2129 }
2130
2131 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2132 &op->args[2], &op->args[5]);
2133 if (i >= 0) {
2134 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2135 }
2136
2137 tt = arg_info(op->args[3]);
2138 ft = arg_info(op->args[4]);
2139 z_mask = tt->z_mask | ft->z_mask;
2140 o_mask = tt->o_mask & ft->o_mask;
2141 s_mask = tt->s_mask & ft->s_mask;
2142
2143 if (ti_is_const(tt) && ti_is_const(ft)) {
2144 uint64_t tv = ti_const_val(tt);
2145 uint64_t fv = ti_const_val(ft);
2146 TCGCond cond = op->args[5];
2147
2148 if (tv == 1 && fv == 0) {
2149 op->opc = INDEX_op_setcond;
2150 op->args[3] = cond;
2151 } else if (fv == 1 && tv == 0) {
2152 op->opc = INDEX_op_setcond;
2153 op->args[3] = tcg_invert_cond(cond);
2154 } else if (tv == -1 && fv == 0) {
2155 op->opc = INDEX_op_negsetcond;
2156 op->args[3] = cond;
2157 } else if (fv == -1 && tv == 0) {
2158 op->opc = INDEX_op_negsetcond;
2159 op->args[3] = tcg_invert_cond(cond);
2160 }
2161 }
2162
2163 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2164 }
2165
fold_mul(OptContext * ctx,TCGOp * op)2166 static bool fold_mul(OptContext *ctx, TCGOp *op)
2167 {
2168 if (fold_const2(ctx, op) ||
2169 fold_xi_to_i(ctx, op, 0) ||
2170 fold_xi_to_x(ctx, op, 1)) {
2171 return true;
2172 }
2173 return finish_folding(ctx, op);
2174 }
2175
fold_mul_highpart(OptContext * ctx,TCGOp * op)2176 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2177 {
2178 if (fold_const2_commutative(ctx, op) ||
2179 fold_xi_to_i(ctx, op, 0)) {
2180 return true;
2181 }
2182 return finish_folding(ctx, op);
2183 }
2184
fold_multiply2(OptContext * ctx,TCGOp * op)2185 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2186 {
2187 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2188
2189 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2190 uint64_t a = arg_const_val(op->args[2]);
2191 uint64_t b = arg_const_val(op->args[3]);
2192 uint64_t h, l;
2193 TCGArg rl, rh;
2194 TCGOp *op2;
2195
2196 switch (op->opc) {
2197 case INDEX_op_mulu2:
2198 if (ctx->type == TCG_TYPE_I32) {
2199 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2200 h = (int32_t)(l >> 32);
2201 l = (int32_t)l;
2202 } else {
2203 mulu64(&l, &h, a, b);
2204 }
2205 break;
2206 case INDEX_op_muls2:
2207 if (ctx->type == TCG_TYPE_I32) {
2208 l = (int64_t)(int32_t)a * (int32_t)b;
2209 h = l >> 32;
2210 l = (int32_t)l;
2211 } else {
2212 muls64(&l, &h, a, b);
2213 }
2214 break;
2215 default:
2216 g_assert_not_reached();
2217 }
2218
2219 rl = op->args[0];
2220 rh = op->args[1];
2221
2222 /* The proper opcode is supplied by tcg_opt_gen_mov. */
2223 op2 = opt_insert_before(ctx, op, 0, 2);
2224
2225 tcg_opt_gen_movi(ctx, op, rl, l);
2226 tcg_opt_gen_movi(ctx, op2, rh, h);
2227 return true;
2228 }
2229 return finish_folding(ctx, op);
2230 }
2231
fold_nand(OptContext * ctx,TCGOp * op)2232 static bool fold_nand(OptContext *ctx, TCGOp *op)
2233 {
2234 uint64_t z_mask, o_mask, s_mask;
2235 TempOptInfo *t1, *t2;
2236
2237 if (fold_const2_commutative(ctx, op) ||
2238 fold_xi_to_not(ctx, op, -1)) {
2239 return true;
2240 }
2241
2242 t1 = arg_info(op->args[1]);
2243 t2 = arg_info(op->args[2]);
2244
2245 z_mask = ~(t1->o_mask & t2->o_mask);
2246 o_mask = ~(t1->z_mask & t2->z_mask);
2247 s_mask = t1->s_mask & t2->s_mask;
2248
2249 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2250 }
2251
fold_neg_no_const(OptContext * ctx,TCGOp * op)2252 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2253 {
2254 /* Set to 1 all bits to the left of the rightmost. */
2255 uint64_t z_mask = arg_info(op->args[1])->z_mask;
2256 z_mask = -(z_mask & -z_mask);
2257
2258 return fold_masks_z(ctx, op, z_mask);
2259 }
2260
fold_neg(OptContext * ctx,TCGOp * op)2261 static bool fold_neg(OptContext *ctx, TCGOp *op)
2262 {
2263 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2264 }
2265
fold_nor(OptContext * ctx,TCGOp * op)2266 static bool fold_nor(OptContext *ctx, TCGOp *op)
2267 {
2268 uint64_t z_mask, o_mask, s_mask;
2269 TempOptInfo *t1, *t2;
2270
2271 if (fold_const2_commutative(ctx, op) ||
2272 fold_xi_to_not(ctx, op, 0)) {
2273 return true;
2274 }
2275
2276 t1 = arg_info(op->args[1]);
2277 t2 = arg_info(op->args[2]);
2278
2279 z_mask = ~(t1->o_mask | t2->o_mask);
2280 o_mask = ~(t1->z_mask | t2->z_mask);
2281 s_mask = t1->s_mask & t2->s_mask;
2282
2283 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2284 }
2285
fold_not(OptContext * ctx,TCGOp * op)2286 static bool fold_not(OptContext *ctx, TCGOp *op)
2287 {
2288 TempOptInfo *t1;
2289
2290 if (fold_const1(ctx, op)) {
2291 return true;
2292 }
2293
2294 t1 = arg_info(op->args[1]);
2295 return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
2296 }
2297
fold_or(OptContext * ctx,TCGOp * op)2298 static bool fold_or(OptContext *ctx, TCGOp *op)
2299 {
2300 uint64_t z_mask, o_mask, s_mask, a_mask;
2301 TempOptInfo *t1, *t2;
2302
2303 if (fold_const2_commutative(ctx, op) ||
2304 fold_xi_to_x(ctx, op, 0) ||
2305 fold_xx_to_x(ctx, op)) {
2306 return true;
2307 }
2308
2309 t1 = arg_info(op->args[1]);
2310 t2 = arg_info(op->args[2]);
2311
2312 z_mask = t1->z_mask | t2->z_mask;
2313 o_mask = t1->o_mask | t2->o_mask;
2314 s_mask = t1->s_mask & t2->s_mask;
2315
2316 /* Affected bits are those not known one, masked by those known zero. */
2317 a_mask = ~t1->o_mask & t2->z_mask;
2318
2319 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2320 }
2321
fold_orc(OptContext * ctx,TCGOp * op)2322 static bool fold_orc(OptContext *ctx, TCGOp *op)
2323 {
2324 uint64_t z_mask, o_mask, s_mask, a_mask;
2325 TempOptInfo *t1, *t2;
2326
2327 if (fold_const2(ctx, op)) {
2328 return true;
2329 }
2330
2331 t2 = arg_info(op->args[2]);
2332 if (ti_is_const(t2)) {
2333 /* Fold orc r,x,i to or r,x,~i. */
2334 switch (ctx->type) {
2335 case TCG_TYPE_I32:
2336 case TCG_TYPE_I64:
2337 op->opc = INDEX_op_or;
2338 break;
2339 case TCG_TYPE_V64:
2340 case TCG_TYPE_V128:
2341 case TCG_TYPE_V256:
2342 op->opc = INDEX_op_or_vec;
2343 break;
2344 default:
2345 g_assert_not_reached();
2346 }
2347 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2348 return fold_or(ctx, op);
2349 }
2350 if (fold_xx_to_i(ctx, op, -1) ||
2351 fold_ix_to_not(ctx, op, 0)) {
2352 return true;
2353 }
2354 t1 = arg_info(op->args[1]);
2355
2356 z_mask = t1->z_mask | ~t2->o_mask;
2357 o_mask = t1->o_mask | ~t2->z_mask;
2358 s_mask = t1->s_mask & t2->s_mask;
2359
2360 /* Affected bits are those not known one, masked by those known one. */
2361 a_mask = ~t1->o_mask & t2->o_mask;
2362
2363 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2364 }
2365
fold_qemu_ld_1reg(OptContext * ctx,TCGOp * op)2366 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2367 {
2368 const TCGOpDef *def = &tcg_op_defs[op->opc];
2369 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2370 MemOp mop = get_memop(oi);
2371 int width = 8 * memop_size(mop);
2372 uint64_t z_mask = -1, s_mask = 0;
2373
2374 if (width < 64) {
2375 if (mop & MO_SIGN) {
2376 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2377 } else {
2378 z_mask = MAKE_64BIT_MASK(0, width);
2379 }
2380 }
2381
2382 /* Opcodes that touch guest memory stop the mb optimization. */
2383 ctx->prev_mb = NULL;
2384
2385 return fold_masks_zs(ctx, op, z_mask, s_mask);
2386 }
2387
fold_qemu_ld_2reg(OptContext * ctx,TCGOp * op)2388 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2389 {
2390 /* Opcodes that touch guest memory stop the mb optimization. */
2391 ctx->prev_mb = NULL;
2392 return finish_folding(ctx, op);
2393 }
2394
fold_qemu_st(OptContext * ctx,TCGOp * op)2395 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2396 {
2397 /* Opcodes that touch guest memory stop the mb optimization. */
2398 ctx->prev_mb = NULL;
2399 return true;
2400 }
2401
fold_remainder(OptContext * ctx,TCGOp * op)2402 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2403 {
2404 if (fold_const2(ctx, op) ||
2405 fold_xx_to_i(ctx, op, 0)) {
2406 return true;
2407 }
2408 return finish_folding(ctx, op);
2409 }
2410
2411 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
fold_setcond_zmask(OptContext * ctx,TCGOp * op,bool neg)2412 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2413 {
2414 uint64_t a_zmask, b_val;
2415 TCGCond cond;
2416
2417 if (!arg_is_const(op->args[2])) {
2418 return false;
2419 }
2420
2421 a_zmask = arg_info(op->args[1])->z_mask;
2422 b_val = arg_const_val(op->args[2]);
2423 cond = op->args[3];
2424
2425 if (ctx->type == TCG_TYPE_I32) {
2426 a_zmask = (uint32_t)a_zmask;
2427 b_val = (uint32_t)b_val;
2428 }
2429
2430 /*
2431 * A with only low bits set vs B with high bits set means that A < B.
2432 */
2433 if (a_zmask < b_val) {
2434 bool inv = false;
2435
2436 switch (cond) {
2437 case TCG_COND_NE:
2438 case TCG_COND_LEU:
2439 case TCG_COND_LTU:
2440 inv = true;
2441 /* fall through */
2442 case TCG_COND_GTU:
2443 case TCG_COND_GEU:
2444 case TCG_COND_EQ:
2445 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2446 default:
2447 break;
2448 }
2449 }
2450
2451 /*
2452 * A with only lsb set is already boolean.
2453 */
2454 if (a_zmask <= 1) {
2455 bool convert = false;
2456 bool inv = false;
2457
2458 switch (cond) {
2459 case TCG_COND_EQ:
2460 inv = true;
2461 /* fall through */
2462 case TCG_COND_NE:
2463 convert = (b_val == 0);
2464 break;
2465 case TCG_COND_LTU:
2466 case TCG_COND_TSTEQ:
2467 inv = true;
2468 /* fall through */
2469 case TCG_COND_GEU:
2470 case TCG_COND_TSTNE:
2471 convert = (b_val == 1);
2472 break;
2473 default:
2474 break;
2475 }
2476 if (convert) {
2477 if (!inv && !neg) {
2478 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2479 }
2480
2481 if (!inv) {
2482 op->opc = INDEX_op_neg;
2483 } else if (neg) {
2484 op->opc = INDEX_op_add;
2485 op->args[2] = arg_new_constant(ctx, -1);
2486 } else {
2487 op->opc = INDEX_op_xor;
2488 op->args[2] = arg_new_constant(ctx, 1);
2489 }
2490 return -1;
2491 }
2492 }
2493 return 0;
2494 }
2495
fold_setcond_tst_pow2(OptContext * ctx,TCGOp * op,bool neg)2496 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2497 {
2498 TCGCond cond = op->args[3];
2499 TCGArg ret, src1, src2;
2500 TCGOp *op2;
2501 uint64_t val;
2502 int sh;
2503 bool inv;
2504
2505 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2506 return;
2507 }
2508
2509 src2 = op->args[2];
2510 val = arg_const_val(src2);
2511 if (!is_power_of_2(val)) {
2512 return;
2513 }
2514 sh = ctz64(val);
2515
2516 ret = op->args[0];
2517 src1 = op->args[1];
2518 inv = cond == TCG_COND_TSTEQ;
2519
2520 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2521 op->opc = INDEX_op_sextract;
2522 op->args[1] = src1;
2523 op->args[2] = sh;
2524 op->args[3] = 1;
2525 return;
2526 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2527 op->opc = INDEX_op_extract;
2528 op->args[1] = src1;
2529 op->args[2] = sh;
2530 op->args[3] = 1;
2531 } else {
2532 if (sh) {
2533 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2534 op2->args[0] = ret;
2535 op2->args[1] = src1;
2536 op2->args[2] = arg_new_constant(ctx, sh);
2537 src1 = ret;
2538 }
2539 op->opc = INDEX_op_and;
2540 op->args[1] = src1;
2541 op->args[2] = arg_new_constant(ctx, 1);
2542 }
2543
2544 if (neg && inv) {
2545 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2546 op2->args[0] = ret;
2547 op2->args[1] = ret;
2548 op2->args[2] = arg_new_constant(ctx, -1);
2549 } else if (inv) {
2550 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2551 op2->args[0] = ret;
2552 op2->args[1] = ret;
2553 op2->args[2] = arg_new_constant(ctx, 1);
2554 } else if (neg) {
2555 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2556 op2->args[0] = ret;
2557 op2->args[1] = ret;
2558 }
2559 }
2560
fold_setcond(OptContext * ctx,TCGOp * op)2561 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2562 {
2563 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2564 &op->args[2], &op->args[3]);
2565 if (i >= 0) {
2566 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2567 }
2568
2569 i = fold_setcond_zmask(ctx, op, false);
2570 if (i > 0) {
2571 return true;
2572 }
2573 if (i == 0) {
2574 fold_setcond_tst_pow2(ctx, op, false);
2575 }
2576
2577 return fold_masks_z(ctx, op, 1);
2578 }
2579
fold_negsetcond(OptContext * ctx,TCGOp * op)2580 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2581 {
2582 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2583 &op->args[2], &op->args[3]);
2584 if (i >= 0) {
2585 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2586 }
2587
2588 i = fold_setcond_zmask(ctx, op, true);
2589 if (i > 0) {
2590 return true;
2591 }
2592 if (i == 0) {
2593 fold_setcond_tst_pow2(ctx, op, true);
2594 }
2595
2596 /* Value is {0,-1} so all bits are repetitions of the sign. */
2597 return fold_masks_s(ctx, op, -1);
2598 }
2599
fold_setcond2(OptContext * ctx,TCGOp * op)2600 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2601 {
2602 TCGCond cond;
2603 int i, inv = 0;
2604
2605 i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2606 cond = op->args[5];
2607 if (i >= 0) {
2608 goto do_setcond_const;
2609 }
2610
2611 switch (cond) {
2612 case TCG_COND_LT:
2613 case TCG_COND_GE:
2614 /*
2615 * Simplify LT/GE comparisons vs zero to a single compare
2616 * vs the high word of the input.
2617 */
2618 if (arg_is_const_val(op->args[3], 0) &&
2619 arg_is_const_val(op->args[4], 0)) {
2620 goto do_setcond_high;
2621 }
2622 break;
2623
2624 case TCG_COND_NE:
2625 inv = 1;
2626 QEMU_FALLTHROUGH;
2627 case TCG_COND_EQ:
2628 /*
2629 * Simplify EQ/NE comparisons where one of the pairs
2630 * can be simplified.
2631 */
2632 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2633 op->args[3], cond);
2634 switch (i ^ inv) {
2635 case 0:
2636 goto do_setcond_const;
2637 case 1:
2638 goto do_setcond_high;
2639 }
2640
2641 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2642 op->args[4], cond);
2643 switch (i ^ inv) {
2644 case 0:
2645 goto do_setcond_const;
2646 case 1:
2647 goto do_setcond_low;
2648 }
2649 break;
2650
2651 case TCG_COND_TSTEQ:
2652 case TCG_COND_TSTNE:
2653 if (arg_is_const_val(op->args[3], 0)) {
2654 goto do_setcond_high;
2655 }
2656 if (arg_is_const_val(op->args[4], 0)) {
2657 goto do_setcond_low;
2658 }
2659 break;
2660
2661 default:
2662 break;
2663
2664 do_setcond_low:
2665 op->args[2] = op->args[3];
2666 op->args[3] = cond;
2667 op->opc = INDEX_op_setcond;
2668 return fold_setcond(ctx, op);
2669
2670 do_setcond_high:
2671 op->args[1] = op->args[2];
2672 op->args[2] = op->args[4];
2673 op->args[3] = cond;
2674 op->opc = INDEX_op_setcond;
2675 return fold_setcond(ctx, op);
2676 }
2677
2678 return fold_masks_z(ctx, op, 1);
2679
2680 do_setcond_const:
2681 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2682 }
2683
fold_sextract(OptContext * ctx,TCGOp * op)2684 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2685 {
2686 uint64_t z_mask, o_mask, s_mask, a_mask;
2687 TempOptInfo *t1 = arg_info(op->args[1]);
2688 int pos = op->args[2];
2689 int len = op->args[3];
2690
2691 if (ti_is_const(t1)) {
2692 return tcg_opt_gen_movi(ctx, op, op->args[0],
2693 sextract64(ti_const_val(t1), pos, len));
2694 }
2695
2696 s_mask = t1->s_mask >> pos;
2697 s_mask |= -1ull << (len - 1);
2698 a_mask = pos ? -1 : s_mask & ~t1->s_mask;
2699
2700 z_mask = sextract64(t1->z_mask, pos, len);
2701 o_mask = sextract64(t1->o_mask, pos, len);
2702
2703 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2704 }
2705
fold_shift(OptContext * ctx,TCGOp * op)2706 static bool fold_shift(OptContext *ctx, TCGOp *op)
2707 {
2708 uint64_t s_mask, z_mask, o_mask;
2709 TempOptInfo *t1, *t2;
2710
2711 if (fold_const2(ctx, op) ||
2712 fold_ix_to_i(ctx, op, 0) ||
2713 fold_xi_to_x(ctx, op, 0)) {
2714 return true;
2715 }
2716
2717 t1 = arg_info(op->args[1]);
2718 t2 = arg_info(op->args[2]);
2719 s_mask = t1->s_mask;
2720 z_mask = t1->z_mask;
2721 o_mask = t1->o_mask;
2722
2723 if (ti_is_const(t2)) {
2724 int sh = ti_const_val(t2);
2725
2726 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2727 o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
2728 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2729
2730 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2731 }
2732
2733 switch (op->opc) {
2734 case INDEX_op_sar:
2735 /*
2736 * Arithmetic right shift will not reduce the number of
2737 * input sign repetitions.
2738 */
2739 return fold_masks_s(ctx, op, s_mask);
2740 case INDEX_op_shr:
2741 /*
2742 * If the sign bit is known zero, then logical right shift
2743 * will not reduce the number of input sign repetitions.
2744 */
2745 if (~z_mask & -s_mask) {
2746 return fold_masks_s(ctx, op, s_mask);
2747 }
2748 break;
2749 default:
2750 break;
2751 }
2752
2753 return finish_folding(ctx, op);
2754 }
2755
fold_sub_to_neg(OptContext * ctx,TCGOp * op)2756 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2757 {
2758 TCGOpcode neg_op;
2759 bool have_neg;
2760
2761 if (!arg_is_const_val(op->args[1], 0)) {
2762 return false;
2763 }
2764
2765 switch (ctx->type) {
2766 case TCG_TYPE_I32:
2767 case TCG_TYPE_I64:
2768 neg_op = INDEX_op_neg;
2769 have_neg = true;
2770 break;
2771 case TCG_TYPE_V64:
2772 case TCG_TYPE_V128:
2773 case TCG_TYPE_V256:
2774 neg_op = INDEX_op_neg_vec;
2775 have_neg = (TCG_TARGET_HAS_neg_vec &&
2776 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2777 break;
2778 default:
2779 g_assert_not_reached();
2780 }
2781 if (have_neg) {
2782 op->opc = neg_op;
2783 op->args[1] = op->args[2];
2784 return fold_neg_no_const(ctx, op);
2785 }
2786 return false;
2787 }
2788
2789 /* We cannot as yet do_constant_folding with vectors. */
fold_sub_vec(OptContext * ctx,TCGOp * op)2790 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2791 {
2792 if (fold_xx_to_i(ctx, op, 0) ||
2793 fold_xi_to_x(ctx, op, 0) ||
2794 fold_sub_to_neg(ctx, op)) {
2795 return true;
2796 }
2797 return finish_folding(ctx, op);
2798 }
2799
fold_sub(OptContext * ctx,TCGOp * op)2800 static bool fold_sub(OptContext *ctx, TCGOp *op)
2801 {
2802 if (fold_const2(ctx, op) ||
2803 fold_xx_to_i(ctx, op, 0) ||
2804 fold_xi_to_x(ctx, op, 0) ||
2805 fold_sub_to_neg(ctx, op)) {
2806 return true;
2807 }
2808
2809 /* Fold sub r,x,i to add r,x,-i */
2810 if (arg_is_const(op->args[2])) {
2811 uint64_t val = arg_const_val(op->args[2]);
2812
2813 op->opc = INDEX_op_add;
2814 op->args[2] = arg_new_constant(ctx, -val);
2815 }
2816 return finish_folding(ctx, op);
2817 }
2818
squash_prev_borrowout(OptContext * ctx,TCGOp * op)2819 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2820 {
2821 TempOptInfo *t2;
2822
2823 op = QTAILQ_PREV(op, link);
2824 switch (op->opc) {
2825 case INDEX_op_subbo:
2826 op->opc = INDEX_op_sub;
2827 fold_sub(ctx, op);
2828 break;
2829 case INDEX_op_subbio:
2830 op->opc = INDEX_op_subbi;
2831 break;
2832 case INDEX_op_subb1o:
2833 t2 = arg_info(op->args[2]);
2834 if (ti_is_const(t2)) {
2835 op->opc = INDEX_op_add;
2836 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2837 /* Perform other constant folding, if needed. */
2838 fold_add(ctx, op);
2839 } else {
2840 TCGArg ret = op->args[0];
2841 op->opc = INDEX_op_sub;
2842 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2843 op->args[0] = ret;
2844 op->args[1] = ret;
2845 op->args[2] = arg_new_constant(ctx, -1);
2846 }
2847 break;
2848 default:
2849 g_assert_not_reached();
2850 }
2851 }
2852
fold_subbi(OptContext * ctx,TCGOp * op)2853 static bool fold_subbi(OptContext *ctx, TCGOp *op)
2854 {
2855 TempOptInfo *t2;
2856 int borrow_in = ctx->carry_state;
2857
2858 if (borrow_in < 0) {
2859 return finish_folding(ctx, op);
2860 }
2861 ctx->carry_state = -1;
2862
2863 squash_prev_borrowout(ctx, op);
2864 if (borrow_in == 0) {
2865 op->opc = INDEX_op_sub;
2866 return fold_sub(ctx, op);
2867 }
2868
2869 /*
2870 * Propagate the known carry-in into any constant, then negate to
2871 * transform from sub to add. If there is no constant, emit a
2872 * separate add -1.
2873 */
2874 t2 = arg_info(op->args[2]);
2875 if (ti_is_const(t2)) {
2876 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2877 } else {
2878 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2879
2880 op2->args[0] = op->args[0];
2881 op2->args[1] = op->args[1];
2882 op2->args[2] = op->args[2];
2883 fold_sub(ctx, op2);
2884
2885 op->args[1] = op->args[0];
2886 op->args[2] = arg_new_constant(ctx, -1);
2887 }
2888 op->opc = INDEX_op_add;
2889 return fold_add(ctx, op);
2890 }
2891
fold_subbio(OptContext * ctx,TCGOp * op)2892 static bool fold_subbio(OptContext *ctx, TCGOp *op)
2893 {
2894 TempOptInfo *t1, *t2;
2895 int borrow_out = -1;
2896
2897 if (ctx->carry_state < 0) {
2898 return finish_folding(ctx, op);
2899 }
2900
2901 squash_prev_borrowout(ctx, op);
2902 if (ctx->carry_state == 0) {
2903 goto do_subbo;
2904 }
2905
2906 t1 = arg_info(op->args[1]);
2907 t2 = arg_info(op->args[2]);
2908
2909 /* Propagate the known borrow-in into a constant, if possible. */
2910 if (ti_is_const(t2)) {
2911 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2912 uint64_t v = ti_const_val(t2) & max;
2913
2914 if (v < max) {
2915 op->args[2] = arg_new_constant(ctx, v + 1);
2916 goto do_subbo;
2917 }
2918 /* subtracting max + 1 produces known borrow out. */
2919 borrow_out = 1;
2920 }
2921 if (ti_is_const(t1)) {
2922 uint64_t v = ti_const_val(t1);
2923 if (v != 0) {
2924 op->args[2] = arg_new_constant(ctx, v - 1);
2925 goto do_subbo;
2926 }
2927 }
2928
2929 /* Adjust the opcode to remember the known carry-in. */
2930 op->opc = INDEX_op_subb1o;
2931 ctx->carry_state = borrow_out;
2932 return finish_folding(ctx, op);
2933
2934 do_subbo:
2935 op->opc = INDEX_op_subbo;
2936 return fold_subbo(ctx, op);
2937 }
2938
fold_subbo(OptContext * ctx,TCGOp * op)2939 static bool fold_subbo(OptContext *ctx, TCGOp *op)
2940 {
2941 TempOptInfo *t1 = arg_info(op->args[1]);
2942 TempOptInfo *t2 = arg_info(op->args[2]);
2943 int borrow_out = -1;
2944
2945 if (ti_is_const(t2)) {
2946 uint64_t v2 = ti_const_val(t2);
2947 if (v2 == 0) {
2948 borrow_out = 0;
2949 } else if (ti_is_const(t1)) {
2950 uint64_t v1 = ti_const_val(t1);
2951 borrow_out = v1 < v2;
2952 }
2953 }
2954 ctx->carry_state = borrow_out;
2955 return finish_folding(ctx, op);
2956 }
2957
fold_tcg_ld(OptContext * ctx,TCGOp * op)2958 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2959 {
2960 uint64_t z_mask = -1, s_mask = 0;
2961
2962 /* We can't do any folding with a load, but we can record bits. */
2963 switch (op->opc) {
2964 case INDEX_op_ld8s:
2965 s_mask = INT8_MIN;
2966 break;
2967 case INDEX_op_ld8u:
2968 z_mask = MAKE_64BIT_MASK(0, 8);
2969 break;
2970 case INDEX_op_ld16s:
2971 s_mask = INT16_MIN;
2972 break;
2973 case INDEX_op_ld16u:
2974 z_mask = MAKE_64BIT_MASK(0, 16);
2975 break;
2976 case INDEX_op_ld32s:
2977 s_mask = INT32_MIN;
2978 break;
2979 case INDEX_op_ld32u:
2980 z_mask = MAKE_64BIT_MASK(0, 32);
2981 break;
2982 default:
2983 g_assert_not_reached();
2984 }
2985 return fold_masks_zs(ctx, op, z_mask, s_mask);
2986 }
2987
fold_tcg_ld_memcopy(OptContext * ctx,TCGOp * op)2988 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2989 {
2990 TCGTemp *dst, *src;
2991 intptr_t ofs;
2992 TCGType type;
2993
2994 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2995 return finish_folding(ctx, op);
2996 }
2997
2998 type = ctx->type;
2999 ofs = op->args[2];
3000 dst = arg_temp(op->args[0]);
3001 src = find_mem_copy_for(ctx, type, ofs);
3002 if (src && src->base_type == type) {
3003 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
3004 }
3005
3006 reset_ts(ctx, dst);
3007 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
3008 return true;
3009 }
3010
fold_tcg_st(OptContext * ctx,TCGOp * op)3011 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
3012 {
3013 intptr_t ofs = op->args[2];
3014 intptr_t lm1;
3015
3016 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3017 remove_mem_copy_all(ctx);
3018 return true;
3019 }
3020
3021 switch (op->opc) {
3022 case INDEX_op_st8:
3023 lm1 = 0;
3024 break;
3025 case INDEX_op_st16:
3026 lm1 = 1;
3027 break;
3028 case INDEX_op_st32:
3029 lm1 = 3;
3030 break;
3031 case INDEX_op_st:
3032 case INDEX_op_st_vec:
3033 lm1 = tcg_type_size(ctx->type) - 1;
3034 break;
3035 default:
3036 g_assert_not_reached();
3037 }
3038 remove_mem_copy_in(ctx, ofs, ofs + lm1);
3039 return true;
3040 }
3041
fold_tcg_st_memcopy(OptContext * ctx,TCGOp * op)3042 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
3043 {
3044 TCGTemp *src;
3045 intptr_t ofs, last;
3046 TCGType type;
3047
3048 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3049 return fold_tcg_st(ctx, op);
3050 }
3051
3052 src = arg_temp(op->args[0]);
3053 ofs = op->args[2];
3054 type = ctx->type;
3055
3056 /*
3057 * Eliminate duplicate stores of a constant.
3058 * This happens frequently when the target ISA zero-extends.
3059 */
3060 if (ts_is_const(src)) {
3061 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
3062 if (src == prev) {
3063 tcg_op_remove(ctx->tcg, op);
3064 return true;
3065 }
3066 }
3067
3068 last = ofs + tcg_type_size(type) - 1;
3069 remove_mem_copy_in(ctx, ofs, last);
3070 record_mem_copy(ctx, type, src, ofs, last);
3071 return true;
3072 }
3073
fold_xor(OptContext * ctx,TCGOp * op)3074 static bool fold_xor(OptContext *ctx, TCGOp *op)
3075 {
3076 uint64_t z_mask, o_mask, s_mask;
3077 TempOptInfo *t1, *t2;
3078
3079 if (fold_const2_commutative(ctx, op) ||
3080 fold_xx_to_i(ctx, op, 0) ||
3081 fold_xi_to_x(ctx, op, 0) ||
3082 fold_xi_to_not(ctx, op, -1)) {
3083 return true;
3084 }
3085
3086 t1 = arg_info(op->args[1]);
3087 t2 = arg_info(op->args[2]);
3088
3089 z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
3090 o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
3091 s_mask = t1->s_mask & t2->s_mask;
3092
3093 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
3094 }
3095
3096 /* Propagate constants and copies, fold constant expressions. */
tcg_optimize(TCGContext * s)3097 void tcg_optimize(TCGContext *s)
3098 {
3099 int nb_temps, i;
3100 TCGOp *op, *op_next;
3101 OptContext ctx = { .tcg = s };
3102
3103 QSIMPLEQ_INIT(&ctx.mem_free);
3104
3105 /* Array VALS has an element for each temp.
3106 If this temp holds a constant then its value is kept in VALS' element.
3107 If this temp is a copy of other ones then the other copies are
3108 available through the doubly linked circular list. */
3109
3110 nb_temps = s->nb_temps;
3111 for (i = 0; i < nb_temps; ++i) {
3112 s->temps[i].state_ptr = NULL;
3113 }
3114
3115 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3116 TCGOpcode opc = op->opc;
3117 const TCGOpDef *def;
3118 bool done = false;
3119
3120 /* Calls are special. */
3121 if (opc == INDEX_op_call) {
3122 fold_call(&ctx, op);
3123 continue;
3124 }
3125
3126 def = &tcg_op_defs[opc];
3127 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3128 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3129
3130 /* Pre-compute the type of the operation. */
3131 ctx.type = TCGOP_TYPE(op);
3132
3133 /*
3134 * Process each opcode.
3135 * Sorted alphabetically by opcode as much as possible.
3136 */
3137 switch (opc) {
3138 case INDEX_op_add:
3139 done = fold_add(&ctx, op);
3140 break;
3141 case INDEX_op_add_vec:
3142 done = fold_add_vec(&ctx, op);
3143 break;
3144 case INDEX_op_addci:
3145 done = fold_addci(&ctx, op);
3146 break;
3147 case INDEX_op_addcio:
3148 done = fold_addcio(&ctx, op);
3149 break;
3150 case INDEX_op_addco:
3151 done = fold_addco(&ctx, op);
3152 break;
3153 case INDEX_op_and:
3154 case INDEX_op_and_vec:
3155 done = fold_and(&ctx, op);
3156 break;
3157 case INDEX_op_andc:
3158 case INDEX_op_andc_vec:
3159 done = fold_andc(&ctx, op);
3160 break;
3161 case INDEX_op_brcond:
3162 done = fold_brcond(&ctx, op);
3163 break;
3164 case INDEX_op_brcond2_i32:
3165 done = fold_brcond2(&ctx, op);
3166 break;
3167 case INDEX_op_bswap16:
3168 case INDEX_op_bswap32:
3169 case INDEX_op_bswap64:
3170 done = fold_bswap(&ctx, op);
3171 break;
3172 case INDEX_op_clz:
3173 case INDEX_op_ctz:
3174 done = fold_count_zeros(&ctx, op);
3175 break;
3176 case INDEX_op_ctpop:
3177 done = fold_ctpop(&ctx, op);
3178 break;
3179 case INDEX_op_deposit:
3180 done = fold_deposit(&ctx, op);
3181 break;
3182 case INDEX_op_divs:
3183 case INDEX_op_divu:
3184 done = fold_divide(&ctx, op);
3185 break;
3186 case INDEX_op_dup_vec:
3187 done = fold_dup(&ctx, op);
3188 break;
3189 case INDEX_op_dup2_vec:
3190 done = fold_dup2(&ctx, op);
3191 break;
3192 case INDEX_op_eqv:
3193 case INDEX_op_eqv_vec:
3194 done = fold_eqv(&ctx, op);
3195 break;
3196 case INDEX_op_extract:
3197 done = fold_extract(&ctx, op);
3198 break;
3199 case INDEX_op_extract2:
3200 done = fold_extract2(&ctx, op);
3201 break;
3202 case INDEX_op_ext_i32_i64:
3203 done = fold_exts(&ctx, op);
3204 break;
3205 case INDEX_op_extu_i32_i64:
3206 case INDEX_op_extrl_i64_i32:
3207 case INDEX_op_extrh_i64_i32:
3208 done = fold_extu(&ctx, op);
3209 break;
3210 case INDEX_op_ld8s:
3211 case INDEX_op_ld8u:
3212 case INDEX_op_ld16s:
3213 case INDEX_op_ld16u:
3214 case INDEX_op_ld32s:
3215 case INDEX_op_ld32u:
3216 done = fold_tcg_ld(&ctx, op);
3217 break;
3218 case INDEX_op_ld:
3219 case INDEX_op_ld_vec:
3220 done = fold_tcg_ld_memcopy(&ctx, op);
3221 break;
3222 case INDEX_op_st8:
3223 case INDEX_op_st16:
3224 case INDEX_op_st32:
3225 done = fold_tcg_st(&ctx, op);
3226 break;
3227 case INDEX_op_st:
3228 case INDEX_op_st_vec:
3229 done = fold_tcg_st_memcopy(&ctx, op);
3230 break;
3231 case INDEX_op_mb:
3232 done = fold_mb(&ctx, op);
3233 break;
3234 case INDEX_op_mov:
3235 case INDEX_op_mov_vec:
3236 done = fold_mov(&ctx, op);
3237 break;
3238 case INDEX_op_movcond:
3239 done = fold_movcond(&ctx, op);
3240 break;
3241 case INDEX_op_mul:
3242 done = fold_mul(&ctx, op);
3243 break;
3244 case INDEX_op_mulsh:
3245 case INDEX_op_muluh:
3246 done = fold_mul_highpart(&ctx, op);
3247 break;
3248 case INDEX_op_muls2:
3249 case INDEX_op_mulu2:
3250 done = fold_multiply2(&ctx, op);
3251 break;
3252 case INDEX_op_nand:
3253 case INDEX_op_nand_vec:
3254 done = fold_nand(&ctx, op);
3255 break;
3256 case INDEX_op_neg:
3257 done = fold_neg(&ctx, op);
3258 break;
3259 case INDEX_op_nor:
3260 case INDEX_op_nor_vec:
3261 done = fold_nor(&ctx, op);
3262 break;
3263 case INDEX_op_not:
3264 case INDEX_op_not_vec:
3265 done = fold_not(&ctx, op);
3266 break;
3267 case INDEX_op_or:
3268 case INDEX_op_or_vec:
3269 done = fold_or(&ctx, op);
3270 break;
3271 case INDEX_op_orc:
3272 case INDEX_op_orc_vec:
3273 done = fold_orc(&ctx, op);
3274 break;
3275 case INDEX_op_qemu_ld:
3276 done = fold_qemu_ld_1reg(&ctx, op);
3277 break;
3278 case INDEX_op_qemu_ld2:
3279 done = fold_qemu_ld_2reg(&ctx, op);
3280 break;
3281 case INDEX_op_qemu_st:
3282 case INDEX_op_qemu_st2:
3283 done = fold_qemu_st(&ctx, op);
3284 break;
3285 case INDEX_op_rems:
3286 case INDEX_op_remu:
3287 done = fold_remainder(&ctx, op);
3288 break;
3289 case INDEX_op_rotl:
3290 case INDEX_op_rotr:
3291 case INDEX_op_sar:
3292 case INDEX_op_shl:
3293 case INDEX_op_shr:
3294 done = fold_shift(&ctx, op);
3295 break;
3296 case INDEX_op_setcond:
3297 done = fold_setcond(&ctx, op);
3298 break;
3299 case INDEX_op_negsetcond:
3300 done = fold_negsetcond(&ctx, op);
3301 break;
3302 case INDEX_op_setcond2_i32:
3303 done = fold_setcond2(&ctx, op);
3304 break;
3305 case INDEX_op_cmp_vec:
3306 done = fold_cmp_vec(&ctx, op);
3307 break;
3308 case INDEX_op_cmpsel_vec:
3309 done = fold_cmpsel_vec(&ctx, op);
3310 break;
3311 case INDEX_op_bitsel_vec:
3312 done = fold_bitsel_vec(&ctx, op);
3313 break;
3314 case INDEX_op_sextract:
3315 done = fold_sextract(&ctx, op);
3316 break;
3317 case INDEX_op_sub:
3318 done = fold_sub(&ctx, op);
3319 break;
3320 case INDEX_op_subbi:
3321 done = fold_subbi(&ctx, op);
3322 break;
3323 case INDEX_op_subbio:
3324 done = fold_subbio(&ctx, op);
3325 break;
3326 case INDEX_op_subbo:
3327 done = fold_subbo(&ctx, op);
3328 break;
3329 case INDEX_op_sub_vec:
3330 done = fold_sub_vec(&ctx, op);
3331 break;
3332 case INDEX_op_xor:
3333 case INDEX_op_xor_vec:
3334 done = fold_xor(&ctx, op);
3335 break;
3336 case INDEX_op_set_label:
3337 case INDEX_op_br:
3338 case INDEX_op_exit_tb:
3339 case INDEX_op_goto_tb:
3340 case INDEX_op_goto_ptr:
3341 finish_ebb(&ctx);
3342 done = true;
3343 break;
3344 default:
3345 done = finish_folding(&ctx, op);
3346 break;
3347 }
3348 tcg_debug_assert(done);
3349 }
3350 }
3351