1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32
33
34 typedef struct MemCopyInfo {
35 IntervalTreeNode itree;
36 QSIMPLEQ_ENTRY (MemCopyInfo) next;
37 TCGTemp *ts;
38 TCGType type;
39 } MemCopyInfo;
40
41 typedef struct TempOptInfo {
42 bool is_const;
43 TCGTemp *prev_copy;
44 TCGTemp *next_copy;
45 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
46 uint64_t val;
47 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
48 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
49 } TempOptInfo;
50
51 typedef struct OptContext {
52 TCGContext *tcg;
53 TCGOp *prev_mb;
54 TCGTempSet temps_used;
55
56 IntervalTreeRoot mem_copy;
57 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
58
59 /* In flight values from optimization. */
60 TCGType type;
61 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */
62 } OptContext;
63
ts_info(TCGTemp * ts)64 static inline TempOptInfo *ts_info(TCGTemp *ts)
65 {
66 return ts->state_ptr;
67 }
68
arg_info(TCGArg arg)69 static inline TempOptInfo *arg_info(TCGArg arg)
70 {
71 return ts_info(arg_temp(arg));
72 }
73
ti_is_const(TempOptInfo * ti)74 static inline bool ti_is_const(TempOptInfo *ti)
75 {
76 return ti->is_const;
77 }
78
ti_const_val(TempOptInfo * ti)79 static inline uint64_t ti_const_val(TempOptInfo *ti)
80 {
81 return ti->val;
82 }
83
ti_is_const_val(TempOptInfo * ti,uint64_t val)84 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
85 {
86 return ti_is_const(ti) && ti_const_val(ti) == val;
87 }
88
ts_is_const(TCGTemp * ts)89 static inline bool ts_is_const(TCGTemp *ts)
90 {
91 return ti_is_const(ts_info(ts));
92 }
93
ts_is_const_val(TCGTemp * ts,uint64_t val)94 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
95 {
96 return ti_is_const_val(ts_info(ts), val);
97 }
98
arg_is_const(TCGArg arg)99 static inline bool arg_is_const(TCGArg arg)
100 {
101 return ts_is_const(arg_temp(arg));
102 }
103
arg_is_const_val(TCGArg arg,uint64_t val)104 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
105 {
106 return ts_is_const_val(arg_temp(arg), val);
107 }
108
ts_is_copy(TCGTemp * ts)109 static inline bool ts_is_copy(TCGTemp *ts)
110 {
111 return ts_info(ts)->next_copy != ts;
112 }
113
cmp_better_copy(TCGTemp * a,TCGTemp * b)114 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
115 {
116 return a->kind < b->kind ? b : a;
117 }
118
119 /* Initialize and activate a temporary. */
init_ts_info(OptContext * ctx,TCGTemp * ts)120 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
121 {
122 size_t idx = temp_idx(ts);
123 TempOptInfo *ti;
124
125 if (test_bit(idx, ctx->temps_used.l)) {
126 return;
127 }
128 set_bit(idx, ctx->temps_used.l);
129
130 ti = ts->state_ptr;
131 if (ti == NULL) {
132 ti = tcg_malloc(sizeof(TempOptInfo));
133 ts->state_ptr = ti;
134 }
135
136 ti->next_copy = ts;
137 ti->prev_copy = ts;
138 QSIMPLEQ_INIT(&ti->mem_copy);
139 if (ts->kind == TEMP_CONST) {
140 ti->is_const = true;
141 ti->val = ts->val;
142 ti->z_mask = ts->val;
143 ti->s_mask = INT64_MIN >> clrsb64(ts->val);
144 } else {
145 ti->is_const = false;
146 ti->z_mask = -1;
147 ti->s_mask = 0;
148 }
149 }
150
mem_copy_first(OptContext * ctx,intptr_t s,intptr_t l)151 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
152 {
153 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
154 return r ? container_of(r, MemCopyInfo, itree) : NULL;
155 }
156
mem_copy_next(MemCopyInfo * mem,intptr_t s,intptr_t l)157 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
158 {
159 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
160 return r ? container_of(r, MemCopyInfo, itree) : NULL;
161 }
162
remove_mem_copy(OptContext * ctx,MemCopyInfo * mc)163 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
164 {
165 TCGTemp *ts = mc->ts;
166 TempOptInfo *ti = ts_info(ts);
167
168 interval_tree_remove(&mc->itree, &ctx->mem_copy);
169 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
170 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
171 }
172
remove_mem_copy_in(OptContext * ctx,intptr_t s,intptr_t l)173 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
174 {
175 while (true) {
176 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
177 if (!mc) {
178 break;
179 }
180 remove_mem_copy(ctx, mc);
181 }
182 }
183
remove_mem_copy_all(OptContext * ctx)184 static void remove_mem_copy_all(OptContext *ctx)
185 {
186 remove_mem_copy_in(ctx, 0, -1);
187 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
188 }
189
find_better_copy(TCGTemp * ts)190 static TCGTemp *find_better_copy(TCGTemp *ts)
191 {
192 TCGTemp *i, *ret;
193
194 /* If this is already readonly, we can't do better. */
195 if (temp_readonly(ts)) {
196 return ts;
197 }
198
199 ret = ts;
200 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
201 ret = cmp_better_copy(ret, i);
202 }
203 return ret;
204 }
205
move_mem_copies(TCGTemp * dst_ts,TCGTemp * src_ts)206 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
207 {
208 TempOptInfo *si = ts_info(src_ts);
209 TempOptInfo *di = ts_info(dst_ts);
210 MemCopyInfo *mc;
211
212 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
213 tcg_debug_assert(mc->ts == src_ts);
214 mc->ts = dst_ts;
215 }
216 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
217 }
218
219 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
reset_ts(OptContext * ctx,TCGTemp * ts)220 static void reset_ts(OptContext *ctx, TCGTemp *ts)
221 {
222 TempOptInfo *ti = ts_info(ts);
223 TCGTemp *pts = ti->prev_copy;
224 TCGTemp *nts = ti->next_copy;
225 TempOptInfo *pi = ts_info(pts);
226 TempOptInfo *ni = ts_info(nts);
227
228 ni->prev_copy = ti->prev_copy;
229 pi->next_copy = ti->next_copy;
230 ti->next_copy = ts;
231 ti->prev_copy = ts;
232 ti->is_const = false;
233 ti->z_mask = -1;
234 ti->s_mask = 0;
235
236 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
237 if (ts == nts) {
238 /* Last temp copy being removed, the mem copies die. */
239 MemCopyInfo *mc;
240 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
241 interval_tree_remove(&mc->itree, &ctx->mem_copy);
242 }
243 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
244 } else {
245 move_mem_copies(find_better_copy(nts), ts);
246 }
247 }
248 }
249
reset_temp(OptContext * ctx,TCGArg arg)250 static void reset_temp(OptContext *ctx, TCGArg arg)
251 {
252 reset_ts(ctx, arg_temp(arg));
253 }
254
record_mem_copy(OptContext * ctx,TCGType type,TCGTemp * ts,intptr_t start,intptr_t last)255 static void record_mem_copy(OptContext *ctx, TCGType type,
256 TCGTemp *ts, intptr_t start, intptr_t last)
257 {
258 MemCopyInfo *mc;
259 TempOptInfo *ti;
260
261 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
262 if (mc) {
263 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
264 } else {
265 mc = tcg_malloc(sizeof(*mc));
266 }
267
268 memset(mc, 0, sizeof(*mc));
269 mc->itree.start = start;
270 mc->itree.last = last;
271 mc->type = type;
272 interval_tree_insert(&mc->itree, &ctx->mem_copy);
273
274 ts = find_better_copy(ts);
275 ti = ts_info(ts);
276 mc->ts = ts;
277 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
278 }
279
ts_are_copies(TCGTemp * ts1,TCGTemp * ts2)280 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
281 {
282 TCGTemp *i;
283
284 if (ts1 == ts2) {
285 return true;
286 }
287
288 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
289 return false;
290 }
291
292 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
293 if (i == ts2) {
294 return true;
295 }
296 }
297
298 return false;
299 }
300
args_are_copies(TCGArg arg1,TCGArg arg2)301 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
302 {
303 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
304 }
305
find_mem_copy_for(OptContext * ctx,TCGType type,intptr_t s)306 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
307 {
308 MemCopyInfo *mc;
309
310 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
311 if (mc->itree.start == s && mc->type == type) {
312 return find_better_copy(mc->ts);
313 }
314 }
315 return NULL;
316 }
317
arg_new_constant(OptContext * ctx,uint64_t val)318 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
319 {
320 TCGType type = ctx->type;
321 TCGTemp *ts;
322
323 if (type == TCG_TYPE_I32) {
324 val = (int32_t)val;
325 }
326
327 ts = tcg_constant_internal(type, val);
328 init_ts_info(ctx, ts);
329
330 return temp_arg(ts);
331 }
332
arg_new_temp(OptContext * ctx)333 static TCGArg arg_new_temp(OptContext *ctx)
334 {
335 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
336 init_ts_info(ctx, ts);
337 return temp_arg(ts);
338 }
339
opt_insert_after(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)340 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
341 TCGOpcode opc, unsigned narg)
342 {
343 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
344 }
345
opt_insert_before(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)346 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
347 TCGOpcode opc, unsigned narg)
348 {
349 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
350 }
351
tcg_opt_gen_mov(OptContext * ctx,TCGOp * op,TCGArg dst,TCGArg src)352 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
353 {
354 TCGTemp *dst_ts = arg_temp(dst);
355 TCGTemp *src_ts = arg_temp(src);
356 TempOptInfo *di;
357 TempOptInfo *si;
358 TCGOpcode new_op;
359
360 if (ts_are_copies(dst_ts, src_ts)) {
361 tcg_op_remove(ctx->tcg, op);
362 return true;
363 }
364
365 reset_ts(ctx, dst_ts);
366 di = ts_info(dst_ts);
367 si = ts_info(src_ts);
368
369 switch (ctx->type) {
370 case TCG_TYPE_I32:
371 case TCG_TYPE_I64:
372 new_op = INDEX_op_mov;
373 break;
374 case TCG_TYPE_V64:
375 case TCG_TYPE_V128:
376 case TCG_TYPE_V256:
377 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
378 new_op = INDEX_op_mov_vec;
379 break;
380 default:
381 g_assert_not_reached();
382 }
383 op->opc = new_op;
384 op->args[0] = dst;
385 op->args[1] = src;
386
387 di->z_mask = si->z_mask;
388 di->s_mask = si->s_mask;
389
390 if (src_ts->type == dst_ts->type) {
391 TempOptInfo *ni = ts_info(si->next_copy);
392
393 di->next_copy = si->next_copy;
394 di->prev_copy = src_ts;
395 ni->prev_copy = dst_ts;
396 si->next_copy = dst_ts;
397 di->is_const = si->is_const;
398 di->val = si->val;
399
400 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
401 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
402 move_mem_copies(dst_ts, src_ts);
403 }
404 }
405 return true;
406 }
407
tcg_opt_gen_movi(OptContext * ctx,TCGOp * op,TCGArg dst,uint64_t val)408 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
409 TCGArg dst, uint64_t val)
410 {
411 /* Convert movi to mov with constant temp. */
412 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
413 }
414
do_constant_folding_2(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)415 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
416 uint64_t x, uint64_t y)
417 {
418 uint64_t l64, h64;
419
420 switch (op) {
421 case INDEX_op_add:
422 return x + y;
423
424 case INDEX_op_sub:
425 return x - y;
426
427 case INDEX_op_mul:
428 return x * y;
429
430 case INDEX_op_and:
431 case INDEX_op_and_vec:
432 return x & y;
433
434 case INDEX_op_or:
435 case INDEX_op_or_vec:
436 return x | y;
437
438 case INDEX_op_xor:
439 case INDEX_op_xor_vec:
440 return x ^ y;
441
442 case INDEX_op_shl:
443 if (type == TCG_TYPE_I32) {
444 return (uint32_t)x << (y & 31);
445 }
446 return (uint64_t)x << (y & 63);
447
448 case INDEX_op_shr:
449 if (type == TCG_TYPE_I32) {
450 return (uint32_t)x >> (y & 31);
451 }
452 return (uint64_t)x >> (y & 63);
453
454 case INDEX_op_sar:
455 if (type == TCG_TYPE_I32) {
456 return (int32_t)x >> (y & 31);
457 }
458 return (int64_t)x >> (y & 63);
459
460 case INDEX_op_rotr:
461 if (type == TCG_TYPE_I32) {
462 return ror32(x, y & 31);
463 }
464 return ror64(x, y & 63);
465
466 case INDEX_op_rotl:
467 if (type == TCG_TYPE_I32) {
468 return rol32(x, y & 31);
469 }
470 return rol64(x, y & 63);
471
472 case INDEX_op_not:
473 case INDEX_op_not_vec:
474 return ~x;
475
476 case INDEX_op_neg:
477 return -x;
478
479 case INDEX_op_andc:
480 case INDEX_op_andc_vec:
481 return x & ~y;
482
483 case INDEX_op_orc:
484 case INDEX_op_orc_vec:
485 return x | ~y;
486
487 case INDEX_op_eqv:
488 case INDEX_op_eqv_vec:
489 return ~(x ^ y);
490
491 case INDEX_op_nand:
492 case INDEX_op_nand_vec:
493 return ~(x & y);
494
495 case INDEX_op_nor:
496 case INDEX_op_nor_vec:
497 return ~(x | y);
498
499 case INDEX_op_clz:
500 if (type == TCG_TYPE_I32) {
501 return (uint32_t)x ? clz32(x) : y;
502 }
503 return x ? clz64(x) : y;
504
505 case INDEX_op_ctz:
506 if (type == TCG_TYPE_I32) {
507 return (uint32_t)x ? ctz32(x) : y;
508 }
509 return x ? ctz64(x) : y;
510
511 case INDEX_op_ctpop:
512 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
513
514 case INDEX_op_bswap16:
515 x = bswap16(x);
516 return y & TCG_BSWAP_OS ? (int16_t)x : x;
517
518 case INDEX_op_bswap32:
519 x = bswap32(x);
520 return y & TCG_BSWAP_OS ? (int32_t)x : x;
521
522 case INDEX_op_bswap64:
523 return bswap64(x);
524
525 case INDEX_op_ext_i32_i64:
526 return (int32_t)x;
527
528 case INDEX_op_extu_i32_i64:
529 case INDEX_op_extrl_i64_i32:
530 return (uint32_t)x;
531
532 case INDEX_op_extrh_i64_i32:
533 return (uint64_t)x >> 32;
534
535 case INDEX_op_muluh:
536 if (type == TCG_TYPE_I32) {
537 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
538 }
539 mulu64(&l64, &h64, x, y);
540 return h64;
541
542 case INDEX_op_mulsh:
543 if (type == TCG_TYPE_I32) {
544 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
545 }
546 muls64(&l64, &h64, x, y);
547 return h64;
548
549 case INDEX_op_divs:
550 /* Avoid crashing on divide by zero, otherwise undefined. */
551 if (type == TCG_TYPE_I32) {
552 return (int32_t)x / ((int32_t)y ? : 1);
553 }
554 return (int64_t)x / ((int64_t)y ? : 1);
555
556 case INDEX_op_divu:
557 if (type == TCG_TYPE_I32) {
558 return (uint32_t)x / ((uint32_t)y ? : 1);
559 }
560 return (uint64_t)x / ((uint64_t)y ? : 1);
561
562 case INDEX_op_rems:
563 if (type == TCG_TYPE_I32) {
564 return (int32_t)x % ((int32_t)y ? : 1);
565 }
566 return (int64_t)x % ((int64_t)y ? : 1);
567
568 case INDEX_op_remu:
569 if (type == TCG_TYPE_I32) {
570 return (uint32_t)x % ((uint32_t)y ? : 1);
571 }
572 return (uint64_t)x % ((uint64_t)y ? : 1);
573
574 default:
575 g_assert_not_reached();
576 }
577 }
578
do_constant_folding(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)579 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
580 uint64_t x, uint64_t y)
581 {
582 uint64_t res = do_constant_folding_2(op, type, x, y);
583 if (type == TCG_TYPE_I32) {
584 res = (int32_t)res;
585 }
586 return res;
587 }
588
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)589 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
590 {
591 switch (c) {
592 case TCG_COND_EQ:
593 return x == y;
594 case TCG_COND_NE:
595 return x != y;
596 case TCG_COND_LT:
597 return (int32_t)x < (int32_t)y;
598 case TCG_COND_GE:
599 return (int32_t)x >= (int32_t)y;
600 case TCG_COND_LE:
601 return (int32_t)x <= (int32_t)y;
602 case TCG_COND_GT:
603 return (int32_t)x > (int32_t)y;
604 case TCG_COND_LTU:
605 return x < y;
606 case TCG_COND_GEU:
607 return x >= y;
608 case TCG_COND_LEU:
609 return x <= y;
610 case TCG_COND_GTU:
611 return x > y;
612 case TCG_COND_TSTEQ:
613 return (x & y) == 0;
614 case TCG_COND_TSTNE:
615 return (x & y) != 0;
616 case TCG_COND_ALWAYS:
617 case TCG_COND_NEVER:
618 break;
619 }
620 g_assert_not_reached();
621 }
622
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)623 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
624 {
625 switch (c) {
626 case TCG_COND_EQ:
627 return x == y;
628 case TCG_COND_NE:
629 return x != y;
630 case TCG_COND_LT:
631 return (int64_t)x < (int64_t)y;
632 case TCG_COND_GE:
633 return (int64_t)x >= (int64_t)y;
634 case TCG_COND_LE:
635 return (int64_t)x <= (int64_t)y;
636 case TCG_COND_GT:
637 return (int64_t)x > (int64_t)y;
638 case TCG_COND_LTU:
639 return x < y;
640 case TCG_COND_GEU:
641 return x >= y;
642 case TCG_COND_LEU:
643 return x <= y;
644 case TCG_COND_GTU:
645 return x > y;
646 case TCG_COND_TSTEQ:
647 return (x & y) == 0;
648 case TCG_COND_TSTNE:
649 return (x & y) != 0;
650 case TCG_COND_ALWAYS:
651 case TCG_COND_NEVER:
652 break;
653 }
654 g_assert_not_reached();
655 }
656
do_constant_folding_cond_eq(TCGCond c)657 static int do_constant_folding_cond_eq(TCGCond c)
658 {
659 switch (c) {
660 case TCG_COND_GT:
661 case TCG_COND_LTU:
662 case TCG_COND_LT:
663 case TCG_COND_GTU:
664 case TCG_COND_NE:
665 return 0;
666 case TCG_COND_GE:
667 case TCG_COND_GEU:
668 case TCG_COND_LE:
669 case TCG_COND_LEU:
670 case TCG_COND_EQ:
671 return 1;
672 case TCG_COND_TSTEQ:
673 case TCG_COND_TSTNE:
674 return -1;
675 case TCG_COND_ALWAYS:
676 case TCG_COND_NEVER:
677 break;
678 }
679 g_assert_not_reached();
680 }
681
682 /*
683 * Return -1 if the condition can't be simplified,
684 * and the result of the condition (0 or 1) if it can.
685 */
do_constant_folding_cond(TCGType type,TCGArg x,TCGArg y,TCGCond c)686 static int do_constant_folding_cond(TCGType type, TCGArg x,
687 TCGArg y, TCGCond c)
688 {
689 if (arg_is_const(x) && arg_is_const(y)) {
690 uint64_t xv = arg_info(x)->val;
691 uint64_t yv = arg_info(y)->val;
692
693 switch (type) {
694 case TCG_TYPE_I32:
695 return do_constant_folding_cond_32(xv, yv, c);
696 case TCG_TYPE_I64:
697 return do_constant_folding_cond_64(xv, yv, c);
698 default:
699 /* Only scalar comparisons are optimizable */
700 return -1;
701 }
702 } else if (args_are_copies(x, y)) {
703 return do_constant_folding_cond_eq(c);
704 } else if (arg_is_const_val(y, 0)) {
705 switch (c) {
706 case TCG_COND_LTU:
707 case TCG_COND_TSTNE:
708 return 0;
709 case TCG_COND_GEU:
710 case TCG_COND_TSTEQ:
711 return 1;
712 default:
713 return -1;
714 }
715 }
716 return -1;
717 }
718
719 /**
720 * swap_commutative:
721 * @dest: TCGArg of the destination argument, or NO_DEST.
722 * @p1: first paired argument
723 * @p2: second paired argument
724 *
725 * If *@p1 is a constant and *@p2 is not, swap.
726 * If *@p2 matches @dest, swap.
727 * Return true if a swap was performed.
728 */
729
730 #define NO_DEST temp_arg(NULL)
731
pref_commutative(TempOptInfo * ti)732 static int pref_commutative(TempOptInfo *ti)
733 {
734 /* Slight preference for non-zero constants second. */
735 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
736 }
737
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)738 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
739 {
740 TCGArg a1 = *p1, a2 = *p2;
741 int sum = 0;
742 sum += pref_commutative(arg_info(a1));
743 sum -= pref_commutative(arg_info(a2));
744
745 /* Prefer the constant in second argument, and then the form
746 op a, a, b, which is better handled on non-RISC hosts. */
747 if (sum > 0 || (sum == 0 && dest == a2)) {
748 *p1 = a2;
749 *p2 = a1;
750 return true;
751 }
752 return false;
753 }
754
swap_commutative2(TCGArg * p1,TCGArg * p2)755 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
756 {
757 int sum = 0;
758 sum += pref_commutative(arg_info(p1[0]));
759 sum += pref_commutative(arg_info(p1[1]));
760 sum -= pref_commutative(arg_info(p2[0]));
761 sum -= pref_commutative(arg_info(p2[1]));
762 if (sum > 0) {
763 TCGArg t;
764 t = p1[0], p1[0] = p2[0], p2[0] = t;
765 t = p1[1], p1[1] = p2[1], p2[1] = t;
766 return true;
767 }
768 return false;
769 }
770
771 /*
772 * Return -1 if the condition can't be simplified,
773 * and the result of the condition (0 or 1) if it can.
774 */
do_constant_folding_cond1(OptContext * ctx,TCGOp * op,TCGArg dest,TCGArg * p1,TCGArg * p2,TCGArg * pcond)775 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
776 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
777 {
778 TCGCond cond;
779 TempOptInfo *i1;
780 bool swap;
781 int r;
782
783 swap = swap_commutative(dest, p1, p2);
784 cond = *pcond;
785 if (swap) {
786 *pcond = cond = tcg_swap_cond(cond);
787 }
788
789 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
790 if (r >= 0) {
791 return r;
792 }
793 if (!is_tst_cond(cond)) {
794 return -1;
795 }
796
797 i1 = arg_info(*p1);
798
799 /*
800 * TSTNE x,x -> NE x,0
801 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
802 */
803 if (args_are_copies(*p1, *p2) ||
804 (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
805 *p2 = arg_new_constant(ctx, 0);
806 *pcond = tcg_tst_eqne_cond(cond);
807 return -1;
808 }
809
810 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
811 if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
812 *p2 = arg_new_constant(ctx, 0);
813 *pcond = tcg_tst_ltge_cond(cond);
814 return -1;
815 }
816
817 /* Expand to AND with a temporary if no backend support. */
818 if (!TCG_TARGET_HAS_tst) {
819 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
820 TCGArg tmp = arg_new_temp(ctx);
821
822 op2->args[0] = tmp;
823 op2->args[1] = *p1;
824 op2->args[2] = *p2;
825
826 *p1 = tmp;
827 *p2 = arg_new_constant(ctx, 0);
828 *pcond = tcg_tst_eqne_cond(cond);
829 }
830 return -1;
831 }
832
do_constant_folding_cond2(OptContext * ctx,TCGOp * op,TCGArg * args)833 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
834 {
835 TCGArg al, ah, bl, bh;
836 TCGCond c;
837 bool swap;
838 int r;
839
840 swap = swap_commutative2(args, args + 2);
841 c = args[4];
842 if (swap) {
843 args[4] = c = tcg_swap_cond(c);
844 }
845
846 al = args[0];
847 ah = args[1];
848 bl = args[2];
849 bh = args[3];
850
851 if (arg_is_const(bl) && arg_is_const(bh)) {
852 tcg_target_ulong blv = arg_info(bl)->val;
853 tcg_target_ulong bhv = arg_info(bh)->val;
854 uint64_t b = deposit64(blv, 32, 32, bhv);
855
856 if (arg_is_const(al) && arg_is_const(ah)) {
857 tcg_target_ulong alv = arg_info(al)->val;
858 tcg_target_ulong ahv = arg_info(ah)->val;
859 uint64_t a = deposit64(alv, 32, 32, ahv);
860
861 r = do_constant_folding_cond_64(a, b, c);
862 if (r >= 0) {
863 return r;
864 }
865 }
866
867 if (b == 0) {
868 switch (c) {
869 case TCG_COND_LTU:
870 case TCG_COND_TSTNE:
871 return 0;
872 case TCG_COND_GEU:
873 case TCG_COND_TSTEQ:
874 return 1;
875 default:
876 break;
877 }
878 }
879
880 /* TSTNE x,-1 -> NE x,0 */
881 if (b == -1 && is_tst_cond(c)) {
882 args[3] = args[2] = arg_new_constant(ctx, 0);
883 args[4] = tcg_tst_eqne_cond(c);
884 return -1;
885 }
886
887 /* TSTNE x,sign -> LT x,0 */
888 if (b == INT64_MIN && is_tst_cond(c)) {
889 /* bl must be 0, so copy that to bh */
890 args[3] = bl;
891 args[4] = tcg_tst_ltge_cond(c);
892 return -1;
893 }
894 }
895
896 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
897 r = do_constant_folding_cond_eq(c);
898 if (r >= 0) {
899 return r;
900 }
901
902 /* TSTNE x,x -> NE x,0 */
903 if (is_tst_cond(c)) {
904 args[3] = args[2] = arg_new_constant(ctx, 0);
905 args[4] = tcg_tst_eqne_cond(c);
906 return -1;
907 }
908 }
909
910 /* Expand to AND with a temporary if no backend support. */
911 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
912 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
913 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
914 TCGArg t1 = arg_new_temp(ctx);
915 TCGArg t2 = arg_new_temp(ctx);
916
917 op1->args[0] = t1;
918 op1->args[1] = al;
919 op1->args[2] = bl;
920 op2->args[0] = t2;
921 op2->args[1] = ah;
922 op2->args[2] = bh;
923
924 args[0] = t1;
925 args[1] = t2;
926 args[3] = args[2] = arg_new_constant(ctx, 0);
927 args[4] = tcg_tst_eqne_cond(c);
928 }
929 return -1;
930 }
931
init_arguments(OptContext * ctx,TCGOp * op,int nb_args)932 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
933 {
934 for (int i = 0; i < nb_args; i++) {
935 TCGTemp *ts = arg_temp(op->args[i]);
936 init_ts_info(ctx, ts);
937 }
938 }
939
copy_propagate(OptContext * ctx,TCGOp * op,int nb_oargs,int nb_iargs)940 static void copy_propagate(OptContext *ctx, TCGOp *op,
941 int nb_oargs, int nb_iargs)
942 {
943 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
944 TCGTemp *ts = arg_temp(op->args[i]);
945 if (ts_is_copy(ts)) {
946 op->args[i] = temp_arg(find_better_copy(ts));
947 }
948 }
949 }
950
finish_bb(OptContext * ctx)951 static void finish_bb(OptContext *ctx)
952 {
953 /* We only optimize memory barriers across basic blocks. */
954 ctx->prev_mb = NULL;
955 }
956
finish_ebb(OptContext * ctx)957 static void finish_ebb(OptContext *ctx)
958 {
959 finish_bb(ctx);
960 /* We only optimize across extended basic blocks. */
961 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
962 remove_mem_copy_all(ctx);
963 }
964
finish_folding(OptContext * ctx,TCGOp * op)965 static bool finish_folding(OptContext *ctx, TCGOp *op)
966 {
967 const TCGOpDef *def = &tcg_op_defs[op->opc];
968 int i, nb_oargs;
969
970 nb_oargs = def->nb_oargs;
971 for (i = 0; i < nb_oargs; i++) {
972 TCGTemp *ts = arg_temp(op->args[i]);
973 reset_ts(ctx, ts);
974 }
975 return true;
976 }
977
978 /*
979 * The fold_* functions return true when processing is complete,
980 * usually by folding the operation to a constant or to a copy,
981 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
982 * like collect information about the value produced, for use in
983 * optimizing a subsequent operation.
984 *
985 * These first fold_* functions are all helpers, used by other
986 * folders for more specific operations.
987 */
988
fold_const1(OptContext * ctx,TCGOp * op)989 static bool fold_const1(OptContext *ctx, TCGOp *op)
990 {
991 if (arg_is_const(op->args[1])) {
992 uint64_t t;
993
994 t = arg_info(op->args[1])->val;
995 t = do_constant_folding(op->opc, ctx->type, t, 0);
996 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
997 }
998 return false;
999 }
1000
fold_const2(OptContext * ctx,TCGOp * op)1001 static bool fold_const2(OptContext *ctx, TCGOp *op)
1002 {
1003 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1004 uint64_t t1 = arg_info(op->args[1])->val;
1005 uint64_t t2 = arg_info(op->args[2])->val;
1006
1007 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1008 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1009 }
1010 return false;
1011 }
1012
fold_commutative(OptContext * ctx,TCGOp * op)1013 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1014 {
1015 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1016 return false;
1017 }
1018
fold_const2_commutative(OptContext * ctx,TCGOp * op)1019 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1020 {
1021 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1022 return fold_const2(ctx, op);
1023 }
1024
1025 /*
1026 * Record "zero" and "sign" masks for the single output of @op.
1027 * See TempOptInfo definition of z_mask and s_mask.
1028 * If z_mask allows, fold the output to constant zero.
1029 * The passed s_mask may be augmented by z_mask.
1030 */
fold_masks_zs(OptContext * ctx,TCGOp * op,uint64_t z_mask,int64_t s_mask)1031 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1032 uint64_t z_mask, int64_t s_mask)
1033 {
1034 const TCGOpDef *def = &tcg_op_defs[op->opc];
1035 TCGTemp *ts;
1036 TempOptInfo *ti;
1037 int rep;
1038
1039 /* Only single-output opcodes are supported here. */
1040 tcg_debug_assert(def->nb_oargs == 1);
1041
1042 /*
1043 * 32-bit ops generate 32-bit results, which for the purpose of
1044 * simplifying tcg are sign-extended. Certainly that's how we
1045 * represent our constants elsewhere. Note that the bits will
1046 * be reset properly for a 64-bit value when encountering the
1047 * type changing opcodes.
1048 */
1049 if (ctx->type == TCG_TYPE_I32) {
1050 z_mask = (int32_t)z_mask;
1051 s_mask |= INT32_MIN;
1052 }
1053
1054 if (z_mask == 0) {
1055 return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
1056 }
1057
1058 ts = arg_temp(op->args[0]);
1059 reset_ts(ctx, ts);
1060
1061 ti = ts_info(ts);
1062 ti->z_mask = z_mask;
1063
1064 /* Canonicalize s_mask and incorporate data from z_mask. */
1065 rep = clz64(~s_mask);
1066 rep = MAX(rep, clz64(z_mask));
1067 rep = MAX(rep - 1, 0);
1068 ti->s_mask = INT64_MIN >> rep;
1069
1070 return true;
1071 }
1072
fold_masks_z(OptContext * ctx,TCGOp * op,uint64_t z_mask)1073 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1074 {
1075 return fold_masks_zs(ctx, op, z_mask, 0);
1076 }
1077
fold_masks_s(OptContext * ctx,TCGOp * op,uint64_t s_mask)1078 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1079 {
1080 return fold_masks_zs(ctx, op, -1, s_mask);
1081 }
1082
1083 /*
1084 * An "affected" mask bit is 0 if and only if the result is identical
1085 * to the first input. Thus if the entire mask is 0, the operation
1086 * is equivalent to a copy.
1087 */
fold_affected_mask(OptContext * ctx,TCGOp * op,uint64_t a_mask)1088 static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
1089 {
1090 if (ctx->type == TCG_TYPE_I32) {
1091 a_mask = (uint32_t)a_mask;
1092 }
1093 if (a_mask == 0) {
1094 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1095 }
1096 return false;
1097 }
1098
1099 /*
1100 * Convert @op to NOT, if NOT is supported by the host.
1101 * Return true f the conversion is successful, which will still
1102 * indicate that the processing is complete.
1103 */
1104 static bool fold_not(OptContext *ctx, TCGOp *op);
fold_to_not(OptContext * ctx,TCGOp * op,int idx)1105 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1106 {
1107 TCGOpcode not_op;
1108 bool have_not;
1109
1110 switch (ctx->type) {
1111 case TCG_TYPE_I32:
1112 case TCG_TYPE_I64:
1113 not_op = INDEX_op_not;
1114 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1115 break;
1116 case TCG_TYPE_V64:
1117 case TCG_TYPE_V128:
1118 case TCG_TYPE_V256:
1119 not_op = INDEX_op_not_vec;
1120 have_not = TCG_TARGET_HAS_not_vec;
1121 break;
1122 default:
1123 g_assert_not_reached();
1124 }
1125 if (have_not) {
1126 op->opc = not_op;
1127 op->args[1] = op->args[idx];
1128 return fold_not(ctx, op);
1129 }
1130 return false;
1131 }
1132
1133 /* If the binary operation has first argument @i, fold to @i. */
fold_ix_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1134 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1135 {
1136 if (arg_is_const_val(op->args[1], i)) {
1137 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1138 }
1139 return false;
1140 }
1141
1142 /* If the binary operation has first argument @i, fold to NOT. */
fold_ix_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1143 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1144 {
1145 if (arg_is_const_val(op->args[1], i)) {
1146 return fold_to_not(ctx, op, 2);
1147 }
1148 return false;
1149 }
1150
1151 /* If the binary operation has second argument @i, fold to @i. */
fold_xi_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1152 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1153 {
1154 if (arg_is_const_val(op->args[2], i)) {
1155 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1156 }
1157 return false;
1158 }
1159
1160 /* If the binary operation has second argument @i, fold to identity. */
fold_xi_to_x(OptContext * ctx,TCGOp * op,uint64_t i)1161 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1162 {
1163 if (arg_is_const_val(op->args[2], i)) {
1164 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1165 }
1166 return false;
1167 }
1168
1169 /* If the binary operation has second argument @i, fold to NOT. */
fold_xi_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1170 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1171 {
1172 if (arg_is_const_val(op->args[2], i)) {
1173 return fold_to_not(ctx, op, 1);
1174 }
1175 return false;
1176 }
1177
1178 /* If the binary operation has both arguments equal, fold to @i. */
fold_xx_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1179 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1180 {
1181 if (args_are_copies(op->args[1], op->args[2])) {
1182 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1183 }
1184 return false;
1185 }
1186
1187 /* If the binary operation has both arguments equal, fold to identity. */
fold_xx_to_x(OptContext * ctx,TCGOp * op)1188 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1189 {
1190 if (args_are_copies(op->args[1], op->args[2])) {
1191 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1192 }
1193 return false;
1194 }
1195
1196 /*
1197 * These outermost fold_<op> functions are sorted alphabetically.
1198 *
1199 * The ordering of the transformations should be:
1200 * 1) those that produce a constant
1201 * 2) those that produce a copy
1202 * 3) those that produce information about the result value.
1203 */
1204
1205 static bool fold_addco(OptContext *ctx, TCGOp *op);
1206 static bool fold_or(OptContext *ctx, TCGOp *op);
1207 static bool fold_orc(OptContext *ctx, TCGOp *op);
1208 static bool fold_subbo(OptContext *ctx, TCGOp *op);
1209 static bool fold_xor(OptContext *ctx, TCGOp *op);
1210
fold_add(OptContext * ctx,TCGOp * op)1211 static bool fold_add(OptContext *ctx, TCGOp *op)
1212 {
1213 if (fold_const2_commutative(ctx, op) ||
1214 fold_xi_to_x(ctx, op, 0)) {
1215 return true;
1216 }
1217 return finish_folding(ctx, op);
1218 }
1219
1220 /* We cannot as yet do_constant_folding with vectors. */
fold_add_vec(OptContext * ctx,TCGOp * op)1221 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1222 {
1223 if (fold_commutative(ctx, op) ||
1224 fold_xi_to_x(ctx, op, 0)) {
1225 return true;
1226 }
1227 return finish_folding(ctx, op);
1228 }
1229
squash_prev_carryout(OptContext * ctx,TCGOp * op)1230 static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1231 {
1232 TempOptInfo *t2;
1233
1234 op = QTAILQ_PREV(op, link);
1235 switch (op->opc) {
1236 case INDEX_op_addco:
1237 op->opc = INDEX_op_add;
1238 fold_add(ctx, op);
1239 break;
1240 case INDEX_op_addcio:
1241 op->opc = INDEX_op_addci;
1242 break;
1243 case INDEX_op_addc1o:
1244 op->opc = INDEX_op_add;
1245 t2 = arg_info(op->args[2]);
1246 if (ti_is_const(t2)) {
1247 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1248 /* Perform other constant folding, if needed. */
1249 fold_add(ctx, op);
1250 } else {
1251 TCGArg ret = op->args[0];
1252 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1253 op->args[0] = ret;
1254 op->args[1] = ret;
1255 op->args[2] = arg_new_constant(ctx, 1);
1256 }
1257 break;
1258 default:
1259 g_assert_not_reached();
1260 }
1261 }
1262
fold_addci(OptContext * ctx,TCGOp * op)1263 static bool fold_addci(OptContext *ctx, TCGOp *op)
1264 {
1265 fold_commutative(ctx, op);
1266
1267 if (ctx->carry_state < 0) {
1268 return finish_folding(ctx, op);
1269 }
1270
1271 squash_prev_carryout(ctx, op);
1272 op->opc = INDEX_op_add;
1273
1274 if (ctx->carry_state > 0) {
1275 TempOptInfo *t2 = arg_info(op->args[2]);
1276
1277 /*
1278 * Propagate the known carry-in into a constant, if possible.
1279 * Otherwise emit a second add +1.
1280 */
1281 if (ti_is_const(t2)) {
1282 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1283 } else {
1284 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1285
1286 op2->args[0] = op->args[0];
1287 op2->args[1] = op->args[1];
1288 op2->args[2] = op->args[2];
1289 fold_add(ctx, op2);
1290
1291 op->args[1] = op->args[0];
1292 op->args[2] = arg_new_constant(ctx, 1);
1293 }
1294 }
1295
1296 ctx->carry_state = -1;
1297 return fold_add(ctx, op);
1298 }
1299
fold_addcio(OptContext * ctx,TCGOp * op)1300 static bool fold_addcio(OptContext *ctx, TCGOp *op)
1301 {
1302 TempOptInfo *t1, *t2;
1303 int carry_out = -1;
1304 uint64_t sum, max;
1305
1306 fold_commutative(ctx, op);
1307 t1 = arg_info(op->args[1]);
1308 t2 = arg_info(op->args[2]);
1309
1310 /*
1311 * The z_mask value is >= the maximum value that can be represented
1312 * with the known zero bits. So adding the z_mask values will not
1313 * overflow if and only if the true values cannot overflow.
1314 */
1315 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1316 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1317 carry_out = 0;
1318 }
1319
1320 if (ctx->carry_state < 0) {
1321 ctx->carry_state = carry_out;
1322 return finish_folding(ctx, op);
1323 }
1324
1325 squash_prev_carryout(ctx, op);
1326 if (ctx->carry_state == 0) {
1327 goto do_addco;
1328 }
1329
1330 /* Propagate the known carry-in into a constant, if possible. */
1331 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1332 if (ti_is_const(t2)) {
1333 uint64_t v = ti_const_val(t2) & max;
1334 if (v < max) {
1335 op->args[2] = arg_new_constant(ctx, v + 1);
1336 goto do_addco;
1337 }
1338 /* max + known carry in produces known carry out. */
1339 carry_out = 1;
1340 }
1341 if (ti_is_const(t1)) {
1342 uint64_t v = ti_const_val(t1) & max;
1343 if (v < max) {
1344 op->args[1] = arg_new_constant(ctx, v + 1);
1345 goto do_addco;
1346 }
1347 carry_out = 1;
1348 }
1349
1350 /* Adjust the opcode to remember the known carry-in. */
1351 op->opc = INDEX_op_addc1o;
1352 ctx->carry_state = carry_out;
1353 return finish_folding(ctx, op);
1354
1355 do_addco:
1356 op->opc = INDEX_op_addco;
1357 return fold_addco(ctx, op);
1358 }
1359
fold_addco(OptContext * ctx,TCGOp * op)1360 static bool fold_addco(OptContext *ctx, TCGOp *op)
1361 {
1362 TempOptInfo *t1, *t2;
1363 int carry_out = -1;
1364 uint64_t ign;
1365
1366 fold_commutative(ctx, op);
1367 t1 = arg_info(op->args[1]);
1368 t2 = arg_info(op->args[2]);
1369
1370 if (ti_is_const(t2)) {
1371 uint64_t v2 = ti_const_val(t2);
1372
1373 if (ti_is_const(t1)) {
1374 uint64_t v1 = ti_const_val(t1);
1375 /* Given sign-extension of z_mask for I32, we need not truncate. */
1376 carry_out = uadd64_overflow(v1, v2, &ign);
1377 } else if (v2 == 0) {
1378 carry_out = 0;
1379 }
1380 } else {
1381 /*
1382 * The z_mask value is >= the maximum value that can be represented
1383 * with the known zero bits. So adding the z_mask values will not
1384 * overflow if and only if the true values cannot overflow.
1385 */
1386 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1387 carry_out = 0;
1388 }
1389 }
1390 ctx->carry_state = carry_out;
1391 return finish_folding(ctx, op);
1392 }
1393
fold_and(OptContext * ctx,TCGOp * op)1394 static bool fold_and(OptContext *ctx, TCGOp *op)
1395 {
1396 uint64_t z1, z2, z_mask, s_mask;
1397 TempOptInfo *t1, *t2;
1398
1399 if (fold_const2_commutative(ctx, op) ||
1400 fold_xi_to_i(ctx, op, 0) ||
1401 fold_xi_to_x(ctx, op, -1) ||
1402 fold_xx_to_x(ctx, op)) {
1403 return true;
1404 }
1405
1406 t1 = arg_info(op->args[1]);
1407 t2 = arg_info(op->args[2]);
1408 z1 = t1->z_mask;
1409 z2 = t2->z_mask;
1410
1411 /*
1412 * Known-zeros does not imply known-ones. Therefore unless
1413 * arg2 is constant, we can't infer affected bits from it.
1414 */
1415 if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
1416 return true;
1417 }
1418
1419 z_mask = z1 & z2;
1420
1421 /*
1422 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1423 * Bitwise operations preserve the relative quantity of the repetitions.
1424 */
1425 s_mask = t1->s_mask & t2->s_mask;
1426
1427 return fold_masks_zs(ctx, op, z_mask, s_mask);
1428 }
1429
fold_andc(OptContext * ctx,TCGOp * op)1430 static bool fold_andc(OptContext *ctx, TCGOp *op)
1431 {
1432 uint64_t z_mask, s_mask;
1433 TempOptInfo *t1, *t2;
1434
1435 if (fold_const2(ctx, op) ||
1436 fold_xx_to_i(ctx, op, 0) ||
1437 fold_xi_to_x(ctx, op, 0) ||
1438 fold_ix_to_not(ctx, op, -1)) {
1439 return true;
1440 }
1441
1442 t1 = arg_info(op->args[1]);
1443 t2 = arg_info(op->args[2]);
1444 z_mask = t1->z_mask;
1445
1446 if (ti_is_const(t2)) {
1447 /* Fold andc r,x,i to and r,x,~i. */
1448 switch (ctx->type) {
1449 case TCG_TYPE_I32:
1450 case TCG_TYPE_I64:
1451 op->opc = INDEX_op_and;
1452 break;
1453 case TCG_TYPE_V64:
1454 case TCG_TYPE_V128:
1455 case TCG_TYPE_V256:
1456 op->opc = INDEX_op_and_vec;
1457 break;
1458 default:
1459 g_assert_not_reached();
1460 }
1461 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1462 return fold_and(ctx, op);
1463 }
1464
1465 /*
1466 * Known-zeros does not imply known-ones. Therefore unless
1467 * arg2 is constant, we can't infer anything from it.
1468 */
1469 if (ti_is_const(t2)) {
1470 uint64_t v2 = ti_const_val(t2);
1471 if (fold_affected_mask(ctx, op, z_mask & v2)) {
1472 return true;
1473 }
1474 z_mask &= ~v2;
1475 }
1476
1477 s_mask = t1->s_mask & t2->s_mask;
1478 return fold_masks_zs(ctx, op, z_mask, s_mask);
1479 }
1480
fold_bitsel_vec(OptContext * ctx,TCGOp * op)1481 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1482 {
1483 /* If true and false values are the same, eliminate the cmp. */
1484 if (args_are_copies(op->args[2], op->args[3])) {
1485 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1486 }
1487
1488 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1489 uint64_t tv = arg_info(op->args[2])->val;
1490 uint64_t fv = arg_info(op->args[3])->val;
1491
1492 if (tv == -1 && fv == 0) {
1493 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1494 }
1495 if (tv == 0 && fv == -1) {
1496 if (TCG_TARGET_HAS_not_vec) {
1497 op->opc = INDEX_op_not_vec;
1498 return fold_not(ctx, op);
1499 } else {
1500 op->opc = INDEX_op_xor_vec;
1501 op->args[2] = arg_new_constant(ctx, -1);
1502 return fold_xor(ctx, op);
1503 }
1504 }
1505 }
1506 if (arg_is_const(op->args[2])) {
1507 uint64_t tv = arg_info(op->args[2])->val;
1508 if (tv == -1) {
1509 op->opc = INDEX_op_or_vec;
1510 op->args[2] = op->args[3];
1511 return fold_or(ctx, op);
1512 }
1513 if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1514 op->opc = INDEX_op_andc_vec;
1515 op->args[2] = op->args[1];
1516 op->args[1] = op->args[3];
1517 return fold_andc(ctx, op);
1518 }
1519 }
1520 if (arg_is_const(op->args[3])) {
1521 uint64_t fv = arg_info(op->args[3])->val;
1522 if (fv == 0) {
1523 op->opc = INDEX_op_and_vec;
1524 return fold_and(ctx, op);
1525 }
1526 if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1527 op->opc = INDEX_op_orc_vec;
1528 op->args[2] = op->args[1];
1529 op->args[1] = op->args[3];
1530 return fold_orc(ctx, op);
1531 }
1532 }
1533 return finish_folding(ctx, op);
1534 }
1535
fold_brcond(OptContext * ctx,TCGOp * op)1536 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1537 {
1538 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1539 &op->args[1], &op->args[2]);
1540 if (i == 0) {
1541 tcg_op_remove(ctx->tcg, op);
1542 return true;
1543 }
1544 if (i > 0) {
1545 op->opc = INDEX_op_br;
1546 op->args[0] = op->args[3];
1547 finish_ebb(ctx);
1548 } else {
1549 finish_bb(ctx);
1550 }
1551 return true;
1552 }
1553
fold_brcond2(OptContext * ctx,TCGOp * op)1554 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1555 {
1556 TCGCond cond;
1557 TCGArg label;
1558 int i, inv = 0;
1559
1560 i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1561 cond = op->args[4];
1562 label = op->args[5];
1563 if (i >= 0) {
1564 goto do_brcond_const;
1565 }
1566
1567 switch (cond) {
1568 case TCG_COND_LT:
1569 case TCG_COND_GE:
1570 /*
1571 * Simplify LT/GE comparisons vs zero to a single compare
1572 * vs the high word of the input.
1573 */
1574 if (arg_is_const_val(op->args[2], 0) &&
1575 arg_is_const_val(op->args[3], 0)) {
1576 goto do_brcond_high;
1577 }
1578 break;
1579
1580 case TCG_COND_NE:
1581 inv = 1;
1582 QEMU_FALLTHROUGH;
1583 case TCG_COND_EQ:
1584 /*
1585 * Simplify EQ/NE comparisons where one of the pairs
1586 * can be simplified.
1587 */
1588 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1589 op->args[2], cond);
1590 switch (i ^ inv) {
1591 case 0:
1592 goto do_brcond_const;
1593 case 1:
1594 goto do_brcond_high;
1595 }
1596
1597 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1598 op->args[3], cond);
1599 switch (i ^ inv) {
1600 case 0:
1601 goto do_brcond_const;
1602 case 1:
1603 goto do_brcond_low;
1604 }
1605 break;
1606
1607 case TCG_COND_TSTEQ:
1608 case TCG_COND_TSTNE:
1609 if (arg_is_const_val(op->args[2], 0)) {
1610 goto do_brcond_high;
1611 }
1612 if (arg_is_const_val(op->args[3], 0)) {
1613 goto do_brcond_low;
1614 }
1615 break;
1616
1617 default:
1618 break;
1619
1620 do_brcond_low:
1621 op->opc = INDEX_op_brcond;
1622 op->args[1] = op->args[2];
1623 op->args[2] = cond;
1624 op->args[3] = label;
1625 return fold_brcond(ctx, op);
1626
1627 do_brcond_high:
1628 op->opc = INDEX_op_brcond;
1629 op->args[0] = op->args[1];
1630 op->args[1] = op->args[3];
1631 op->args[2] = cond;
1632 op->args[3] = label;
1633 return fold_brcond(ctx, op);
1634
1635 do_brcond_const:
1636 if (i == 0) {
1637 tcg_op_remove(ctx->tcg, op);
1638 return true;
1639 }
1640 op->opc = INDEX_op_br;
1641 op->args[0] = label;
1642 finish_ebb(ctx);
1643 return true;
1644 }
1645
1646 finish_bb(ctx);
1647 return true;
1648 }
1649
fold_bswap(OptContext * ctx,TCGOp * op)1650 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1651 {
1652 uint64_t z_mask, s_mask, sign;
1653 TempOptInfo *t1 = arg_info(op->args[1]);
1654
1655 if (ti_is_const(t1)) {
1656 return tcg_opt_gen_movi(ctx, op, op->args[0],
1657 do_constant_folding(op->opc, ctx->type,
1658 ti_const_val(t1),
1659 op->args[2]));
1660 }
1661
1662 z_mask = t1->z_mask;
1663 switch (op->opc) {
1664 case INDEX_op_bswap16:
1665 z_mask = bswap16(z_mask);
1666 sign = INT16_MIN;
1667 break;
1668 case INDEX_op_bswap32:
1669 z_mask = bswap32(z_mask);
1670 sign = INT32_MIN;
1671 break;
1672 case INDEX_op_bswap64:
1673 z_mask = bswap64(z_mask);
1674 sign = INT64_MIN;
1675 break;
1676 default:
1677 g_assert_not_reached();
1678 }
1679
1680 s_mask = 0;
1681 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1682 case TCG_BSWAP_OZ:
1683 break;
1684 case TCG_BSWAP_OS:
1685 /* If the sign bit may be 1, force all the bits above to 1. */
1686 if (z_mask & sign) {
1687 z_mask |= sign;
1688 }
1689 /* The value and therefore s_mask is explicitly sign-extended. */
1690 s_mask = sign;
1691 break;
1692 default:
1693 /* The high bits are undefined: force all bits above the sign to 1. */
1694 z_mask |= sign << 1;
1695 break;
1696 }
1697
1698 return fold_masks_zs(ctx, op, z_mask, s_mask);
1699 }
1700
fold_call(OptContext * ctx,TCGOp * op)1701 static bool fold_call(OptContext *ctx, TCGOp *op)
1702 {
1703 TCGContext *s = ctx->tcg;
1704 int nb_oargs = TCGOP_CALLO(op);
1705 int nb_iargs = TCGOP_CALLI(op);
1706 int flags, i;
1707
1708 init_arguments(ctx, op, nb_oargs + nb_iargs);
1709 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1710
1711 /* If the function reads or writes globals, reset temp data. */
1712 flags = tcg_call_flags(op);
1713 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1714 int nb_globals = s->nb_globals;
1715
1716 for (i = 0; i < nb_globals; i++) {
1717 if (test_bit(i, ctx->temps_used.l)) {
1718 reset_ts(ctx, &ctx->tcg->temps[i]);
1719 }
1720 }
1721 }
1722
1723 /* If the function has side effects, reset mem data. */
1724 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1725 remove_mem_copy_all(ctx);
1726 }
1727
1728 /* Reset temp data for outputs. */
1729 for (i = 0; i < nb_oargs; i++) {
1730 reset_temp(ctx, op->args[i]);
1731 }
1732
1733 /* Stop optimizing MB across calls. */
1734 ctx->prev_mb = NULL;
1735 return true;
1736 }
1737
fold_cmp_vec(OptContext * ctx,TCGOp * op)1738 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1739 {
1740 /* Canonicalize the comparison to put immediate second. */
1741 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1742 op->args[3] = tcg_swap_cond(op->args[3]);
1743 }
1744 return finish_folding(ctx, op);
1745 }
1746
fold_cmpsel_vec(OptContext * ctx,TCGOp * op)1747 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1748 {
1749 /* If true and false values are the same, eliminate the cmp. */
1750 if (args_are_copies(op->args[3], op->args[4])) {
1751 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1752 }
1753
1754 /* Canonicalize the comparison to put immediate second. */
1755 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1756 op->args[5] = tcg_swap_cond(op->args[5]);
1757 }
1758 /*
1759 * Canonicalize the "false" input reg to match the destination,
1760 * so that the tcg backend can implement "move if true".
1761 */
1762 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1763 op->args[5] = tcg_invert_cond(op->args[5]);
1764 }
1765 return finish_folding(ctx, op);
1766 }
1767
fold_count_zeros(OptContext * ctx,TCGOp * op)1768 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1769 {
1770 uint64_t z_mask, s_mask;
1771 TempOptInfo *t1 = arg_info(op->args[1]);
1772 TempOptInfo *t2 = arg_info(op->args[2]);
1773
1774 if (ti_is_const(t1)) {
1775 uint64_t t = ti_const_val(t1);
1776
1777 if (t != 0) {
1778 t = do_constant_folding(op->opc, ctx->type, t, 0);
1779 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1780 }
1781 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1782 }
1783
1784 switch (ctx->type) {
1785 case TCG_TYPE_I32:
1786 z_mask = 31;
1787 break;
1788 case TCG_TYPE_I64:
1789 z_mask = 63;
1790 break;
1791 default:
1792 g_assert_not_reached();
1793 }
1794 s_mask = ~z_mask;
1795 z_mask |= t2->z_mask;
1796 s_mask &= t2->s_mask;
1797
1798 return fold_masks_zs(ctx, op, z_mask, s_mask);
1799 }
1800
fold_ctpop(OptContext * ctx,TCGOp * op)1801 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1802 {
1803 uint64_t z_mask;
1804
1805 if (fold_const1(ctx, op)) {
1806 return true;
1807 }
1808
1809 switch (ctx->type) {
1810 case TCG_TYPE_I32:
1811 z_mask = 32 | 31;
1812 break;
1813 case TCG_TYPE_I64:
1814 z_mask = 64 | 63;
1815 break;
1816 default:
1817 g_assert_not_reached();
1818 }
1819 return fold_masks_z(ctx, op, z_mask);
1820 }
1821
fold_deposit(OptContext * ctx,TCGOp * op)1822 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1823 {
1824 TempOptInfo *t1 = arg_info(op->args[1]);
1825 TempOptInfo *t2 = arg_info(op->args[2]);
1826 int ofs = op->args[3];
1827 int len = op->args[4];
1828 int width = 8 * tcg_type_size(ctx->type);
1829 uint64_t z_mask, s_mask;
1830
1831 if (ti_is_const(t1) && ti_is_const(t2)) {
1832 return tcg_opt_gen_movi(ctx, op, op->args[0],
1833 deposit64(ti_const_val(t1), ofs, len,
1834 ti_const_val(t2)));
1835 }
1836
1837 /* Inserting a value into zero at offset 0. */
1838 if (ti_is_const_val(t1, 0) && ofs == 0) {
1839 uint64_t mask = MAKE_64BIT_MASK(0, len);
1840
1841 op->opc = INDEX_op_and;
1842 op->args[1] = op->args[2];
1843 op->args[2] = arg_new_constant(ctx, mask);
1844 return fold_and(ctx, op);
1845 }
1846
1847 /* Inserting zero into a value. */
1848 if (ti_is_const_val(t2, 0)) {
1849 uint64_t mask = deposit64(-1, ofs, len, 0);
1850
1851 op->opc = INDEX_op_and;
1852 op->args[2] = arg_new_constant(ctx, mask);
1853 return fold_and(ctx, op);
1854 }
1855
1856 /* The s_mask from the top portion of the deposit is still valid. */
1857 if (ofs + len == width) {
1858 s_mask = t2->s_mask << ofs;
1859 } else {
1860 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1861 }
1862
1863 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1864 return fold_masks_zs(ctx, op, z_mask, s_mask);
1865 }
1866
fold_divide(OptContext * ctx,TCGOp * op)1867 static bool fold_divide(OptContext *ctx, TCGOp *op)
1868 {
1869 if (fold_const2(ctx, op) ||
1870 fold_xi_to_x(ctx, op, 1)) {
1871 return true;
1872 }
1873 return finish_folding(ctx, op);
1874 }
1875
fold_dup(OptContext * ctx,TCGOp * op)1876 static bool fold_dup(OptContext *ctx, TCGOp *op)
1877 {
1878 if (arg_is_const(op->args[1])) {
1879 uint64_t t = arg_info(op->args[1])->val;
1880 t = dup_const(TCGOP_VECE(op), t);
1881 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1882 }
1883 return finish_folding(ctx, op);
1884 }
1885
fold_dup2(OptContext * ctx,TCGOp * op)1886 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1887 {
1888 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1889 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1890 arg_info(op->args[2])->val);
1891 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1892 }
1893
1894 if (args_are_copies(op->args[1], op->args[2])) {
1895 op->opc = INDEX_op_dup_vec;
1896 TCGOP_VECE(op) = MO_32;
1897 }
1898 return finish_folding(ctx, op);
1899 }
1900
fold_eqv(OptContext * ctx,TCGOp * op)1901 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1902 {
1903 uint64_t s_mask;
1904 TempOptInfo *t1, *t2;
1905
1906 if (fold_const2_commutative(ctx, op) ||
1907 fold_xi_to_x(ctx, op, -1) ||
1908 fold_xi_to_not(ctx, op, 0)) {
1909 return true;
1910 }
1911
1912 t2 = arg_info(op->args[2]);
1913 if (ti_is_const(t2)) {
1914 /* Fold eqv r,x,i to xor r,x,~i. */
1915 switch (ctx->type) {
1916 case TCG_TYPE_I32:
1917 case TCG_TYPE_I64:
1918 op->opc = INDEX_op_xor;
1919 break;
1920 case TCG_TYPE_V64:
1921 case TCG_TYPE_V128:
1922 case TCG_TYPE_V256:
1923 op->opc = INDEX_op_xor_vec;
1924 break;
1925 default:
1926 g_assert_not_reached();
1927 }
1928 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1929 return fold_xor(ctx, op);
1930 }
1931
1932 t1 = arg_info(op->args[1]);
1933 s_mask = t1->s_mask & t2->s_mask;
1934 return fold_masks_s(ctx, op, s_mask);
1935 }
1936
fold_extract(OptContext * ctx,TCGOp * op)1937 static bool fold_extract(OptContext *ctx, TCGOp *op)
1938 {
1939 uint64_t z_mask_old, z_mask;
1940 TempOptInfo *t1 = arg_info(op->args[1]);
1941 int pos = op->args[2];
1942 int len = op->args[3];
1943
1944 if (ti_is_const(t1)) {
1945 return tcg_opt_gen_movi(ctx, op, op->args[0],
1946 extract64(ti_const_val(t1), pos, len));
1947 }
1948
1949 z_mask_old = t1->z_mask;
1950 z_mask = extract64(z_mask_old, pos, len);
1951 if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
1952 return true;
1953 }
1954
1955 return fold_masks_z(ctx, op, z_mask);
1956 }
1957
fold_extract2(OptContext * ctx,TCGOp * op)1958 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1959 {
1960 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1961 uint64_t v1 = arg_info(op->args[1])->val;
1962 uint64_t v2 = arg_info(op->args[2])->val;
1963 int shr = op->args[3];
1964
1965 if (ctx->type == TCG_TYPE_I32) {
1966 v1 = (uint32_t)v1 >> shr;
1967 v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1968 } else {
1969 v1 >>= shr;
1970 v2 <<= 64 - shr;
1971 }
1972 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1973 }
1974 return finish_folding(ctx, op);
1975 }
1976
fold_exts(OptContext * ctx,TCGOp * op)1977 static bool fold_exts(OptContext *ctx, TCGOp *op)
1978 {
1979 uint64_t s_mask, z_mask;
1980 TempOptInfo *t1;
1981
1982 if (fold_const1(ctx, op)) {
1983 return true;
1984 }
1985
1986 t1 = arg_info(op->args[1]);
1987 z_mask = t1->z_mask;
1988 s_mask = t1->s_mask;
1989
1990 switch (op->opc) {
1991 case INDEX_op_ext_i32_i64:
1992 s_mask |= INT32_MIN;
1993 z_mask = (int32_t)z_mask;
1994 break;
1995 default:
1996 g_assert_not_reached();
1997 }
1998 return fold_masks_zs(ctx, op, z_mask, s_mask);
1999 }
2000
fold_extu(OptContext * ctx,TCGOp * op)2001 static bool fold_extu(OptContext *ctx, TCGOp *op)
2002 {
2003 uint64_t z_mask;
2004
2005 if (fold_const1(ctx, op)) {
2006 return true;
2007 }
2008
2009 z_mask = arg_info(op->args[1])->z_mask;
2010 switch (op->opc) {
2011 case INDEX_op_extrl_i64_i32:
2012 case INDEX_op_extu_i32_i64:
2013 z_mask = (uint32_t)z_mask;
2014 break;
2015 case INDEX_op_extrh_i64_i32:
2016 z_mask >>= 32;
2017 break;
2018 default:
2019 g_assert_not_reached();
2020 }
2021 return fold_masks_z(ctx, op, z_mask);
2022 }
2023
fold_mb(OptContext * ctx,TCGOp * op)2024 static bool fold_mb(OptContext *ctx, TCGOp *op)
2025 {
2026 /* Eliminate duplicate and redundant fence instructions. */
2027 if (ctx->prev_mb) {
2028 /*
2029 * Merge two barriers of the same type into one,
2030 * or a weaker barrier into a stronger one,
2031 * or two weaker barriers into a stronger one.
2032 * mb X; mb Y => mb X|Y
2033 * mb; strl => mb; st
2034 * ldaq; mb => ld; mb
2035 * ldaq; strl => ld; mb; st
2036 * Other combinations are also merged into a strong
2037 * barrier. This is stricter than specified but for
2038 * the purposes of TCG is better than not optimizing.
2039 */
2040 ctx->prev_mb->args[0] |= op->args[0];
2041 tcg_op_remove(ctx->tcg, op);
2042 } else {
2043 ctx->prev_mb = op;
2044 }
2045 return true;
2046 }
2047
fold_mov(OptContext * ctx,TCGOp * op)2048 static bool fold_mov(OptContext *ctx, TCGOp *op)
2049 {
2050 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2051 }
2052
fold_movcond(OptContext * ctx,TCGOp * op)2053 static bool fold_movcond(OptContext *ctx, TCGOp *op)
2054 {
2055 uint64_t z_mask, s_mask;
2056 TempOptInfo *tt, *ft;
2057 int i;
2058
2059 /* If true and false values are the same, eliminate the cmp. */
2060 if (args_are_copies(op->args[3], op->args[4])) {
2061 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2062 }
2063
2064 /*
2065 * Canonicalize the "false" input reg to match the destination reg so
2066 * that the tcg backend can implement a "move if true" operation.
2067 */
2068 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2069 op->args[5] = tcg_invert_cond(op->args[5]);
2070 }
2071
2072 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2073 &op->args[2], &op->args[5]);
2074 if (i >= 0) {
2075 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2076 }
2077
2078 tt = arg_info(op->args[3]);
2079 ft = arg_info(op->args[4]);
2080 z_mask = tt->z_mask | ft->z_mask;
2081 s_mask = tt->s_mask & ft->s_mask;
2082
2083 if (ti_is_const(tt) && ti_is_const(ft)) {
2084 uint64_t tv = ti_const_val(tt);
2085 uint64_t fv = ti_const_val(ft);
2086 TCGCond cond = op->args[5];
2087
2088 if (tv == 1 && fv == 0) {
2089 op->opc = INDEX_op_setcond;
2090 op->args[3] = cond;
2091 } else if (fv == 1 && tv == 0) {
2092 op->opc = INDEX_op_setcond;
2093 op->args[3] = tcg_invert_cond(cond);
2094 } else if (tv == -1 && fv == 0) {
2095 op->opc = INDEX_op_negsetcond;
2096 op->args[3] = cond;
2097 } else if (fv == -1 && tv == 0) {
2098 op->opc = INDEX_op_negsetcond;
2099 op->args[3] = tcg_invert_cond(cond);
2100 }
2101 }
2102
2103 return fold_masks_zs(ctx, op, z_mask, s_mask);
2104 }
2105
fold_mul(OptContext * ctx,TCGOp * op)2106 static bool fold_mul(OptContext *ctx, TCGOp *op)
2107 {
2108 if (fold_const2(ctx, op) ||
2109 fold_xi_to_i(ctx, op, 0) ||
2110 fold_xi_to_x(ctx, op, 1)) {
2111 return true;
2112 }
2113 return finish_folding(ctx, op);
2114 }
2115
fold_mul_highpart(OptContext * ctx,TCGOp * op)2116 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2117 {
2118 if (fold_const2_commutative(ctx, op) ||
2119 fold_xi_to_i(ctx, op, 0)) {
2120 return true;
2121 }
2122 return finish_folding(ctx, op);
2123 }
2124
fold_multiply2(OptContext * ctx,TCGOp * op)2125 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2126 {
2127 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2128
2129 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2130 uint64_t a = arg_info(op->args[2])->val;
2131 uint64_t b = arg_info(op->args[3])->val;
2132 uint64_t h, l;
2133 TCGArg rl, rh;
2134 TCGOp *op2;
2135
2136 switch (op->opc) {
2137 case INDEX_op_mulu2:
2138 if (ctx->type == TCG_TYPE_I32) {
2139 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2140 h = (int32_t)(l >> 32);
2141 l = (int32_t)l;
2142 } else {
2143 mulu64(&l, &h, a, b);
2144 }
2145 break;
2146 case INDEX_op_muls2:
2147 if (ctx->type == TCG_TYPE_I32) {
2148 l = (int64_t)(int32_t)a * (int32_t)b;
2149 h = l >> 32;
2150 l = (int32_t)l;
2151 } else {
2152 muls64(&l, &h, a, b);
2153 }
2154 break;
2155 default:
2156 g_assert_not_reached();
2157 }
2158
2159 rl = op->args[0];
2160 rh = op->args[1];
2161
2162 /* The proper opcode is supplied by tcg_opt_gen_mov. */
2163 op2 = opt_insert_before(ctx, op, 0, 2);
2164
2165 tcg_opt_gen_movi(ctx, op, rl, l);
2166 tcg_opt_gen_movi(ctx, op2, rh, h);
2167 return true;
2168 }
2169 return finish_folding(ctx, op);
2170 }
2171
fold_nand(OptContext * ctx,TCGOp * op)2172 static bool fold_nand(OptContext *ctx, TCGOp *op)
2173 {
2174 uint64_t s_mask;
2175
2176 if (fold_const2_commutative(ctx, op) ||
2177 fold_xi_to_not(ctx, op, -1)) {
2178 return true;
2179 }
2180
2181 s_mask = arg_info(op->args[1])->s_mask
2182 & arg_info(op->args[2])->s_mask;
2183 return fold_masks_s(ctx, op, s_mask);
2184 }
2185
fold_neg_no_const(OptContext * ctx,TCGOp * op)2186 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2187 {
2188 /* Set to 1 all bits to the left of the rightmost. */
2189 uint64_t z_mask = arg_info(op->args[1])->z_mask;
2190 z_mask = -(z_mask & -z_mask);
2191
2192 return fold_masks_z(ctx, op, z_mask);
2193 }
2194
fold_neg(OptContext * ctx,TCGOp * op)2195 static bool fold_neg(OptContext *ctx, TCGOp *op)
2196 {
2197 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2198 }
2199
fold_nor(OptContext * ctx,TCGOp * op)2200 static bool fold_nor(OptContext *ctx, TCGOp *op)
2201 {
2202 uint64_t s_mask;
2203
2204 if (fold_const2_commutative(ctx, op) ||
2205 fold_xi_to_not(ctx, op, 0)) {
2206 return true;
2207 }
2208
2209 s_mask = arg_info(op->args[1])->s_mask
2210 & arg_info(op->args[2])->s_mask;
2211 return fold_masks_s(ctx, op, s_mask);
2212 }
2213
fold_not(OptContext * ctx,TCGOp * op)2214 static bool fold_not(OptContext *ctx, TCGOp *op)
2215 {
2216 if (fold_const1(ctx, op)) {
2217 return true;
2218 }
2219 return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
2220 }
2221
fold_or(OptContext * ctx,TCGOp * op)2222 static bool fold_or(OptContext *ctx, TCGOp *op)
2223 {
2224 uint64_t z_mask, s_mask;
2225 TempOptInfo *t1, *t2;
2226
2227 if (fold_const2_commutative(ctx, op) ||
2228 fold_xi_to_x(ctx, op, 0) ||
2229 fold_xx_to_x(ctx, op)) {
2230 return true;
2231 }
2232
2233 t1 = arg_info(op->args[1]);
2234 t2 = arg_info(op->args[2]);
2235 z_mask = t1->z_mask | t2->z_mask;
2236 s_mask = t1->s_mask & t2->s_mask;
2237 return fold_masks_zs(ctx, op, z_mask, s_mask);
2238 }
2239
fold_orc(OptContext * ctx,TCGOp * op)2240 static bool fold_orc(OptContext *ctx, TCGOp *op)
2241 {
2242 uint64_t s_mask;
2243 TempOptInfo *t1, *t2;
2244
2245 if (fold_const2(ctx, op) ||
2246 fold_xx_to_i(ctx, op, -1) ||
2247 fold_xi_to_x(ctx, op, -1) ||
2248 fold_ix_to_not(ctx, op, 0)) {
2249 return true;
2250 }
2251
2252 t2 = arg_info(op->args[2]);
2253 if (ti_is_const(t2)) {
2254 /* Fold orc r,x,i to or r,x,~i. */
2255 switch (ctx->type) {
2256 case TCG_TYPE_I32:
2257 case TCG_TYPE_I64:
2258 op->opc = INDEX_op_or;
2259 break;
2260 case TCG_TYPE_V64:
2261 case TCG_TYPE_V128:
2262 case TCG_TYPE_V256:
2263 op->opc = INDEX_op_or_vec;
2264 break;
2265 default:
2266 g_assert_not_reached();
2267 }
2268 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2269 return fold_or(ctx, op);
2270 }
2271
2272 t1 = arg_info(op->args[1]);
2273 s_mask = t1->s_mask & t2->s_mask;
2274 return fold_masks_s(ctx, op, s_mask);
2275 }
2276
fold_qemu_ld_1reg(OptContext * ctx,TCGOp * op)2277 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2278 {
2279 const TCGOpDef *def = &tcg_op_defs[op->opc];
2280 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2281 MemOp mop = get_memop(oi);
2282 int width = 8 * memop_size(mop);
2283 uint64_t z_mask = -1, s_mask = 0;
2284
2285 if (width < 64) {
2286 if (mop & MO_SIGN) {
2287 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2288 } else {
2289 z_mask = MAKE_64BIT_MASK(0, width);
2290 }
2291 }
2292
2293 /* Opcodes that touch guest memory stop the mb optimization. */
2294 ctx->prev_mb = NULL;
2295
2296 return fold_masks_zs(ctx, op, z_mask, s_mask);
2297 }
2298
fold_qemu_ld_2reg(OptContext * ctx,TCGOp * op)2299 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2300 {
2301 /* Opcodes that touch guest memory stop the mb optimization. */
2302 ctx->prev_mb = NULL;
2303 return finish_folding(ctx, op);
2304 }
2305
fold_qemu_st(OptContext * ctx,TCGOp * op)2306 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2307 {
2308 /* Opcodes that touch guest memory stop the mb optimization. */
2309 ctx->prev_mb = NULL;
2310 return true;
2311 }
2312
fold_remainder(OptContext * ctx,TCGOp * op)2313 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2314 {
2315 if (fold_const2(ctx, op) ||
2316 fold_xx_to_i(ctx, op, 0)) {
2317 return true;
2318 }
2319 return finish_folding(ctx, op);
2320 }
2321
2322 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
fold_setcond_zmask(OptContext * ctx,TCGOp * op,bool neg)2323 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2324 {
2325 uint64_t a_zmask, b_val;
2326 TCGCond cond;
2327
2328 if (!arg_is_const(op->args[2])) {
2329 return false;
2330 }
2331
2332 a_zmask = arg_info(op->args[1])->z_mask;
2333 b_val = arg_info(op->args[2])->val;
2334 cond = op->args[3];
2335
2336 if (ctx->type == TCG_TYPE_I32) {
2337 a_zmask = (uint32_t)a_zmask;
2338 b_val = (uint32_t)b_val;
2339 }
2340
2341 /*
2342 * A with only low bits set vs B with high bits set means that A < B.
2343 */
2344 if (a_zmask < b_val) {
2345 bool inv = false;
2346
2347 switch (cond) {
2348 case TCG_COND_NE:
2349 case TCG_COND_LEU:
2350 case TCG_COND_LTU:
2351 inv = true;
2352 /* fall through */
2353 case TCG_COND_GTU:
2354 case TCG_COND_GEU:
2355 case TCG_COND_EQ:
2356 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2357 default:
2358 break;
2359 }
2360 }
2361
2362 /*
2363 * A with only lsb set is already boolean.
2364 */
2365 if (a_zmask <= 1) {
2366 bool convert = false;
2367 bool inv = false;
2368
2369 switch (cond) {
2370 case TCG_COND_EQ:
2371 inv = true;
2372 /* fall through */
2373 case TCG_COND_NE:
2374 convert = (b_val == 0);
2375 break;
2376 case TCG_COND_LTU:
2377 case TCG_COND_TSTEQ:
2378 inv = true;
2379 /* fall through */
2380 case TCG_COND_GEU:
2381 case TCG_COND_TSTNE:
2382 convert = (b_val == 1);
2383 break;
2384 default:
2385 break;
2386 }
2387 if (convert) {
2388 if (!inv && !neg) {
2389 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2390 }
2391
2392 if (!inv) {
2393 op->opc = INDEX_op_neg;
2394 } else if (neg) {
2395 op->opc = INDEX_op_add;
2396 op->args[2] = arg_new_constant(ctx, -1);
2397 } else {
2398 op->opc = INDEX_op_xor;
2399 op->args[2] = arg_new_constant(ctx, 1);
2400 }
2401 return -1;
2402 }
2403 }
2404 return 0;
2405 }
2406
fold_setcond_tst_pow2(OptContext * ctx,TCGOp * op,bool neg)2407 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2408 {
2409 TCGCond cond = op->args[3];
2410 TCGArg ret, src1, src2;
2411 TCGOp *op2;
2412 uint64_t val;
2413 int sh;
2414 bool inv;
2415
2416 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2417 return;
2418 }
2419
2420 src2 = op->args[2];
2421 val = arg_info(src2)->val;
2422 if (!is_power_of_2(val)) {
2423 return;
2424 }
2425 sh = ctz64(val);
2426
2427 ret = op->args[0];
2428 src1 = op->args[1];
2429 inv = cond == TCG_COND_TSTEQ;
2430
2431 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2432 op->opc = INDEX_op_sextract;
2433 op->args[1] = src1;
2434 op->args[2] = sh;
2435 op->args[3] = 1;
2436 return;
2437 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2438 op->opc = INDEX_op_extract;
2439 op->args[1] = src1;
2440 op->args[2] = sh;
2441 op->args[3] = 1;
2442 } else {
2443 if (sh) {
2444 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2445 op2->args[0] = ret;
2446 op2->args[1] = src1;
2447 op2->args[2] = arg_new_constant(ctx, sh);
2448 src1 = ret;
2449 }
2450 op->opc = INDEX_op_and;
2451 op->args[1] = src1;
2452 op->args[2] = arg_new_constant(ctx, 1);
2453 }
2454
2455 if (neg && inv) {
2456 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2457 op2->args[0] = ret;
2458 op2->args[1] = ret;
2459 op2->args[2] = arg_new_constant(ctx, -1);
2460 } else if (inv) {
2461 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2462 op2->args[0] = ret;
2463 op2->args[1] = ret;
2464 op2->args[2] = arg_new_constant(ctx, 1);
2465 } else if (neg) {
2466 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2467 op2->args[0] = ret;
2468 op2->args[1] = ret;
2469 }
2470 }
2471
fold_setcond(OptContext * ctx,TCGOp * op)2472 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2473 {
2474 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2475 &op->args[2], &op->args[3]);
2476 if (i >= 0) {
2477 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2478 }
2479
2480 i = fold_setcond_zmask(ctx, op, false);
2481 if (i > 0) {
2482 return true;
2483 }
2484 if (i == 0) {
2485 fold_setcond_tst_pow2(ctx, op, false);
2486 }
2487
2488 return fold_masks_z(ctx, op, 1);
2489 }
2490
fold_negsetcond(OptContext * ctx,TCGOp * op)2491 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2492 {
2493 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2494 &op->args[2], &op->args[3]);
2495 if (i >= 0) {
2496 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2497 }
2498
2499 i = fold_setcond_zmask(ctx, op, true);
2500 if (i > 0) {
2501 return true;
2502 }
2503 if (i == 0) {
2504 fold_setcond_tst_pow2(ctx, op, true);
2505 }
2506
2507 /* Value is {0,-1} so all bits are repetitions of the sign. */
2508 return fold_masks_s(ctx, op, -1);
2509 }
2510
fold_setcond2(OptContext * ctx,TCGOp * op)2511 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2512 {
2513 TCGCond cond;
2514 int i, inv = 0;
2515
2516 i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2517 cond = op->args[5];
2518 if (i >= 0) {
2519 goto do_setcond_const;
2520 }
2521
2522 switch (cond) {
2523 case TCG_COND_LT:
2524 case TCG_COND_GE:
2525 /*
2526 * Simplify LT/GE comparisons vs zero to a single compare
2527 * vs the high word of the input.
2528 */
2529 if (arg_is_const_val(op->args[3], 0) &&
2530 arg_is_const_val(op->args[4], 0)) {
2531 goto do_setcond_high;
2532 }
2533 break;
2534
2535 case TCG_COND_NE:
2536 inv = 1;
2537 QEMU_FALLTHROUGH;
2538 case TCG_COND_EQ:
2539 /*
2540 * Simplify EQ/NE comparisons where one of the pairs
2541 * can be simplified.
2542 */
2543 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2544 op->args[3], cond);
2545 switch (i ^ inv) {
2546 case 0:
2547 goto do_setcond_const;
2548 case 1:
2549 goto do_setcond_high;
2550 }
2551
2552 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2553 op->args[4], cond);
2554 switch (i ^ inv) {
2555 case 0:
2556 goto do_setcond_const;
2557 case 1:
2558 goto do_setcond_low;
2559 }
2560 break;
2561
2562 case TCG_COND_TSTEQ:
2563 case TCG_COND_TSTNE:
2564 if (arg_is_const_val(op->args[3], 0)) {
2565 goto do_setcond_high;
2566 }
2567 if (arg_is_const_val(op->args[4], 0)) {
2568 goto do_setcond_low;
2569 }
2570 break;
2571
2572 default:
2573 break;
2574
2575 do_setcond_low:
2576 op->args[2] = op->args[3];
2577 op->args[3] = cond;
2578 op->opc = INDEX_op_setcond;
2579 return fold_setcond(ctx, op);
2580
2581 do_setcond_high:
2582 op->args[1] = op->args[2];
2583 op->args[2] = op->args[4];
2584 op->args[3] = cond;
2585 op->opc = INDEX_op_setcond;
2586 return fold_setcond(ctx, op);
2587 }
2588
2589 return fold_masks_z(ctx, op, 1);
2590
2591 do_setcond_const:
2592 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2593 }
2594
fold_sextract(OptContext * ctx,TCGOp * op)2595 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2596 {
2597 uint64_t z_mask, s_mask, s_mask_old;
2598 TempOptInfo *t1 = arg_info(op->args[1]);
2599 int pos = op->args[2];
2600 int len = op->args[3];
2601
2602 if (ti_is_const(t1)) {
2603 return tcg_opt_gen_movi(ctx, op, op->args[0],
2604 sextract64(ti_const_val(t1), pos, len));
2605 }
2606
2607 s_mask_old = t1->s_mask;
2608 s_mask = s_mask_old >> pos;
2609 s_mask |= -1ull << (len - 1);
2610
2611 if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
2612 return true;
2613 }
2614
2615 z_mask = sextract64(t1->z_mask, pos, len);
2616 return fold_masks_zs(ctx, op, z_mask, s_mask);
2617 }
2618
fold_shift(OptContext * ctx,TCGOp * op)2619 static bool fold_shift(OptContext *ctx, TCGOp *op)
2620 {
2621 uint64_t s_mask, z_mask;
2622 TempOptInfo *t1, *t2;
2623
2624 if (fold_const2(ctx, op) ||
2625 fold_ix_to_i(ctx, op, 0) ||
2626 fold_xi_to_x(ctx, op, 0)) {
2627 return true;
2628 }
2629
2630 t1 = arg_info(op->args[1]);
2631 t2 = arg_info(op->args[2]);
2632 s_mask = t1->s_mask;
2633 z_mask = t1->z_mask;
2634
2635 if (ti_is_const(t2)) {
2636 int sh = ti_const_val(t2);
2637
2638 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2639 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2640
2641 return fold_masks_zs(ctx, op, z_mask, s_mask);
2642 }
2643
2644 switch (op->opc) {
2645 case INDEX_op_sar:
2646 /*
2647 * Arithmetic right shift will not reduce the number of
2648 * input sign repetitions.
2649 */
2650 return fold_masks_s(ctx, op, s_mask);
2651 case INDEX_op_shr:
2652 /*
2653 * If the sign bit is known zero, then logical right shift
2654 * will not reduce the number of input sign repetitions.
2655 */
2656 if (~z_mask & -s_mask) {
2657 return fold_masks_s(ctx, op, s_mask);
2658 }
2659 break;
2660 default:
2661 break;
2662 }
2663
2664 return finish_folding(ctx, op);
2665 }
2666
fold_sub_to_neg(OptContext * ctx,TCGOp * op)2667 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2668 {
2669 TCGOpcode neg_op;
2670 bool have_neg;
2671
2672 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
2673 return false;
2674 }
2675
2676 switch (ctx->type) {
2677 case TCG_TYPE_I32:
2678 case TCG_TYPE_I64:
2679 neg_op = INDEX_op_neg;
2680 have_neg = true;
2681 break;
2682 case TCG_TYPE_V64:
2683 case TCG_TYPE_V128:
2684 case TCG_TYPE_V256:
2685 neg_op = INDEX_op_neg_vec;
2686 have_neg = (TCG_TARGET_HAS_neg_vec &&
2687 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2688 break;
2689 default:
2690 g_assert_not_reached();
2691 }
2692 if (have_neg) {
2693 op->opc = neg_op;
2694 op->args[1] = op->args[2];
2695 return fold_neg_no_const(ctx, op);
2696 }
2697 return false;
2698 }
2699
2700 /* We cannot as yet do_constant_folding with vectors. */
fold_sub_vec(OptContext * ctx,TCGOp * op)2701 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2702 {
2703 if (fold_xx_to_i(ctx, op, 0) ||
2704 fold_xi_to_x(ctx, op, 0) ||
2705 fold_sub_to_neg(ctx, op)) {
2706 return true;
2707 }
2708 return finish_folding(ctx, op);
2709 }
2710
fold_sub(OptContext * ctx,TCGOp * op)2711 static bool fold_sub(OptContext *ctx, TCGOp *op)
2712 {
2713 if (fold_const2(ctx, op) ||
2714 fold_xx_to_i(ctx, op, 0) ||
2715 fold_xi_to_x(ctx, op, 0) ||
2716 fold_sub_to_neg(ctx, op)) {
2717 return true;
2718 }
2719
2720 /* Fold sub r,x,i to add r,x,-i */
2721 if (arg_is_const(op->args[2])) {
2722 uint64_t val = arg_info(op->args[2])->val;
2723
2724 op->opc = INDEX_op_add;
2725 op->args[2] = arg_new_constant(ctx, -val);
2726 }
2727 return finish_folding(ctx, op);
2728 }
2729
squash_prev_borrowout(OptContext * ctx,TCGOp * op)2730 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2731 {
2732 TempOptInfo *t2;
2733
2734 op = QTAILQ_PREV(op, link);
2735 switch (op->opc) {
2736 case INDEX_op_subbo:
2737 op->opc = INDEX_op_sub;
2738 fold_sub(ctx, op);
2739 break;
2740 case INDEX_op_subbio:
2741 op->opc = INDEX_op_subbi;
2742 break;
2743 case INDEX_op_subb1o:
2744 t2 = arg_info(op->args[2]);
2745 if (ti_is_const(t2)) {
2746 op->opc = INDEX_op_add;
2747 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2748 /* Perform other constant folding, if needed. */
2749 fold_add(ctx, op);
2750 } else {
2751 TCGArg ret = op->args[0];
2752 op->opc = INDEX_op_sub;
2753 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2754 op->args[0] = ret;
2755 op->args[1] = ret;
2756 op->args[2] = arg_new_constant(ctx, -1);
2757 }
2758 break;
2759 default:
2760 g_assert_not_reached();
2761 }
2762 }
2763
fold_subbi(OptContext * ctx,TCGOp * op)2764 static bool fold_subbi(OptContext *ctx, TCGOp *op)
2765 {
2766 TempOptInfo *t2;
2767 int borrow_in = ctx->carry_state;
2768
2769 if (borrow_in < 0) {
2770 return finish_folding(ctx, op);
2771 }
2772 ctx->carry_state = -1;
2773
2774 squash_prev_borrowout(ctx, op);
2775 if (borrow_in == 0) {
2776 op->opc = INDEX_op_sub;
2777 return fold_sub(ctx, op);
2778 }
2779
2780 /*
2781 * Propagate the known carry-in into any constant, then negate to
2782 * transform from sub to add. If there is no constant, emit a
2783 * separate add -1.
2784 */
2785 t2 = arg_info(op->args[2]);
2786 if (ti_is_const(t2)) {
2787 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2788 } else {
2789 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2790
2791 op2->args[0] = op->args[0];
2792 op2->args[1] = op->args[1];
2793 op2->args[2] = op->args[2];
2794 fold_sub(ctx, op2);
2795
2796 op->args[1] = op->args[0];
2797 op->args[2] = arg_new_constant(ctx, -1);
2798 }
2799 op->opc = INDEX_op_add;
2800 return fold_add(ctx, op);
2801 }
2802
fold_subbio(OptContext * ctx,TCGOp * op)2803 static bool fold_subbio(OptContext *ctx, TCGOp *op)
2804 {
2805 TempOptInfo *t1, *t2;
2806 int borrow_out = -1;
2807
2808 if (ctx->carry_state < 0) {
2809 return finish_folding(ctx, op);
2810 }
2811
2812 squash_prev_borrowout(ctx, op);
2813 if (ctx->carry_state == 0) {
2814 goto do_subbo;
2815 }
2816
2817 t1 = arg_info(op->args[1]);
2818 t2 = arg_info(op->args[2]);
2819
2820 /* Propagate the known borrow-in into a constant, if possible. */
2821 if (ti_is_const(t2)) {
2822 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2823 uint64_t v = ti_const_val(t2) & max;
2824
2825 if (v < max) {
2826 op->args[2] = arg_new_constant(ctx, v + 1);
2827 goto do_subbo;
2828 }
2829 /* subtracting max + 1 produces known borrow out. */
2830 borrow_out = 1;
2831 }
2832 if (ti_is_const(t1)) {
2833 uint64_t v = ti_const_val(t1);
2834 if (v != 0) {
2835 op->args[2] = arg_new_constant(ctx, v - 1);
2836 goto do_subbo;
2837 }
2838 }
2839
2840 /* Adjust the opcode to remember the known carry-in. */
2841 op->opc = INDEX_op_subb1o;
2842 ctx->carry_state = borrow_out;
2843 return finish_folding(ctx, op);
2844
2845 do_subbo:
2846 op->opc = INDEX_op_subbo;
2847 return fold_subbo(ctx, op);
2848 }
2849
fold_subbo(OptContext * ctx,TCGOp * op)2850 static bool fold_subbo(OptContext *ctx, TCGOp *op)
2851 {
2852 TempOptInfo *t1 = arg_info(op->args[1]);
2853 TempOptInfo *t2 = arg_info(op->args[2]);
2854 int borrow_out = -1;
2855
2856 if (ti_is_const(t2)) {
2857 uint64_t v2 = ti_const_val(t2);
2858 if (v2 == 0) {
2859 borrow_out = 0;
2860 } else if (ti_is_const(t1)) {
2861 uint64_t v1 = ti_const_val(t1);
2862 borrow_out = v1 < v2;
2863 }
2864 }
2865 ctx->carry_state = borrow_out;
2866 return finish_folding(ctx, op);
2867 }
2868
fold_tcg_ld(OptContext * ctx,TCGOp * op)2869 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2870 {
2871 uint64_t z_mask = -1, s_mask = 0;
2872
2873 /* We can't do any folding with a load, but we can record bits. */
2874 switch (op->opc) {
2875 case INDEX_op_ld8s:
2876 s_mask = INT8_MIN;
2877 break;
2878 case INDEX_op_ld8u:
2879 z_mask = MAKE_64BIT_MASK(0, 8);
2880 break;
2881 case INDEX_op_ld16s:
2882 s_mask = INT16_MIN;
2883 break;
2884 case INDEX_op_ld16u:
2885 z_mask = MAKE_64BIT_MASK(0, 16);
2886 break;
2887 case INDEX_op_ld32s:
2888 s_mask = INT32_MIN;
2889 break;
2890 case INDEX_op_ld32u:
2891 z_mask = MAKE_64BIT_MASK(0, 32);
2892 break;
2893 default:
2894 g_assert_not_reached();
2895 }
2896 return fold_masks_zs(ctx, op, z_mask, s_mask);
2897 }
2898
fold_tcg_ld_memcopy(OptContext * ctx,TCGOp * op)2899 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2900 {
2901 TCGTemp *dst, *src;
2902 intptr_t ofs;
2903 TCGType type;
2904
2905 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2906 return finish_folding(ctx, op);
2907 }
2908
2909 type = ctx->type;
2910 ofs = op->args[2];
2911 dst = arg_temp(op->args[0]);
2912 src = find_mem_copy_for(ctx, type, ofs);
2913 if (src && src->base_type == type) {
2914 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
2915 }
2916
2917 reset_ts(ctx, dst);
2918 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
2919 return true;
2920 }
2921
fold_tcg_st(OptContext * ctx,TCGOp * op)2922 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
2923 {
2924 intptr_t ofs = op->args[2];
2925 intptr_t lm1;
2926
2927 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2928 remove_mem_copy_all(ctx);
2929 return true;
2930 }
2931
2932 switch (op->opc) {
2933 case INDEX_op_st8:
2934 lm1 = 0;
2935 break;
2936 case INDEX_op_st16:
2937 lm1 = 1;
2938 break;
2939 case INDEX_op_st32:
2940 lm1 = 3;
2941 break;
2942 case INDEX_op_st:
2943 case INDEX_op_st_vec:
2944 lm1 = tcg_type_size(ctx->type) - 1;
2945 break;
2946 default:
2947 g_assert_not_reached();
2948 }
2949 remove_mem_copy_in(ctx, ofs, ofs + lm1);
2950 return true;
2951 }
2952
fold_tcg_st_memcopy(OptContext * ctx,TCGOp * op)2953 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
2954 {
2955 TCGTemp *src;
2956 intptr_t ofs, last;
2957 TCGType type;
2958
2959 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2960 return fold_tcg_st(ctx, op);
2961 }
2962
2963 src = arg_temp(op->args[0]);
2964 ofs = op->args[2];
2965 type = ctx->type;
2966
2967 /*
2968 * Eliminate duplicate stores of a constant.
2969 * This happens frequently when the target ISA zero-extends.
2970 */
2971 if (ts_is_const(src)) {
2972 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
2973 if (src == prev) {
2974 tcg_op_remove(ctx->tcg, op);
2975 return true;
2976 }
2977 }
2978
2979 last = ofs + tcg_type_size(type) - 1;
2980 remove_mem_copy_in(ctx, ofs, last);
2981 record_mem_copy(ctx, type, src, ofs, last);
2982 return true;
2983 }
2984
fold_xor(OptContext * ctx,TCGOp * op)2985 static bool fold_xor(OptContext *ctx, TCGOp *op)
2986 {
2987 uint64_t z_mask, s_mask;
2988 TempOptInfo *t1, *t2;
2989
2990 if (fold_const2_commutative(ctx, op) ||
2991 fold_xx_to_i(ctx, op, 0) ||
2992 fold_xi_to_x(ctx, op, 0) ||
2993 fold_xi_to_not(ctx, op, -1)) {
2994 return true;
2995 }
2996
2997 t1 = arg_info(op->args[1]);
2998 t2 = arg_info(op->args[2]);
2999 z_mask = t1->z_mask | t2->z_mask;
3000 s_mask = t1->s_mask & t2->s_mask;
3001 return fold_masks_zs(ctx, op, z_mask, s_mask);
3002 }
3003
3004 /* Propagate constants and copies, fold constant expressions. */
tcg_optimize(TCGContext * s)3005 void tcg_optimize(TCGContext *s)
3006 {
3007 int nb_temps, i;
3008 TCGOp *op, *op_next;
3009 OptContext ctx = { .tcg = s };
3010
3011 QSIMPLEQ_INIT(&ctx.mem_free);
3012
3013 /* Array VALS has an element for each temp.
3014 If this temp holds a constant then its value is kept in VALS' element.
3015 If this temp is a copy of other ones then the other copies are
3016 available through the doubly linked circular list. */
3017
3018 nb_temps = s->nb_temps;
3019 for (i = 0; i < nb_temps; ++i) {
3020 s->temps[i].state_ptr = NULL;
3021 }
3022
3023 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3024 TCGOpcode opc = op->opc;
3025 const TCGOpDef *def;
3026 bool done = false;
3027
3028 /* Calls are special. */
3029 if (opc == INDEX_op_call) {
3030 fold_call(&ctx, op);
3031 continue;
3032 }
3033
3034 def = &tcg_op_defs[opc];
3035 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3036 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3037
3038 /* Pre-compute the type of the operation. */
3039 ctx.type = TCGOP_TYPE(op);
3040
3041 /*
3042 * Process each opcode.
3043 * Sorted alphabetically by opcode as much as possible.
3044 */
3045 switch (opc) {
3046 case INDEX_op_add:
3047 done = fold_add(&ctx, op);
3048 break;
3049 case INDEX_op_add_vec:
3050 done = fold_add_vec(&ctx, op);
3051 break;
3052 case INDEX_op_addci:
3053 done = fold_addci(&ctx, op);
3054 break;
3055 case INDEX_op_addcio:
3056 done = fold_addcio(&ctx, op);
3057 break;
3058 case INDEX_op_addco:
3059 done = fold_addco(&ctx, op);
3060 break;
3061 case INDEX_op_and:
3062 case INDEX_op_and_vec:
3063 done = fold_and(&ctx, op);
3064 break;
3065 case INDEX_op_andc:
3066 case INDEX_op_andc_vec:
3067 done = fold_andc(&ctx, op);
3068 break;
3069 case INDEX_op_brcond:
3070 done = fold_brcond(&ctx, op);
3071 break;
3072 case INDEX_op_brcond2_i32:
3073 done = fold_brcond2(&ctx, op);
3074 break;
3075 case INDEX_op_bswap16:
3076 case INDEX_op_bswap32:
3077 case INDEX_op_bswap64:
3078 done = fold_bswap(&ctx, op);
3079 break;
3080 case INDEX_op_clz:
3081 case INDEX_op_ctz:
3082 done = fold_count_zeros(&ctx, op);
3083 break;
3084 case INDEX_op_ctpop:
3085 done = fold_ctpop(&ctx, op);
3086 break;
3087 case INDEX_op_deposit:
3088 done = fold_deposit(&ctx, op);
3089 break;
3090 case INDEX_op_divs:
3091 case INDEX_op_divu:
3092 done = fold_divide(&ctx, op);
3093 break;
3094 case INDEX_op_dup_vec:
3095 done = fold_dup(&ctx, op);
3096 break;
3097 case INDEX_op_dup2_vec:
3098 done = fold_dup2(&ctx, op);
3099 break;
3100 case INDEX_op_eqv:
3101 case INDEX_op_eqv_vec:
3102 done = fold_eqv(&ctx, op);
3103 break;
3104 case INDEX_op_extract:
3105 done = fold_extract(&ctx, op);
3106 break;
3107 case INDEX_op_extract2:
3108 done = fold_extract2(&ctx, op);
3109 break;
3110 case INDEX_op_ext_i32_i64:
3111 done = fold_exts(&ctx, op);
3112 break;
3113 case INDEX_op_extu_i32_i64:
3114 case INDEX_op_extrl_i64_i32:
3115 case INDEX_op_extrh_i64_i32:
3116 done = fold_extu(&ctx, op);
3117 break;
3118 case INDEX_op_ld8s:
3119 case INDEX_op_ld8u:
3120 case INDEX_op_ld16s:
3121 case INDEX_op_ld16u:
3122 case INDEX_op_ld32s:
3123 case INDEX_op_ld32u:
3124 done = fold_tcg_ld(&ctx, op);
3125 break;
3126 case INDEX_op_ld:
3127 case INDEX_op_ld_vec:
3128 done = fold_tcg_ld_memcopy(&ctx, op);
3129 break;
3130 case INDEX_op_st8:
3131 case INDEX_op_st16:
3132 case INDEX_op_st32:
3133 done = fold_tcg_st(&ctx, op);
3134 break;
3135 case INDEX_op_st:
3136 case INDEX_op_st_vec:
3137 done = fold_tcg_st_memcopy(&ctx, op);
3138 break;
3139 case INDEX_op_mb:
3140 done = fold_mb(&ctx, op);
3141 break;
3142 case INDEX_op_mov:
3143 case INDEX_op_mov_vec:
3144 done = fold_mov(&ctx, op);
3145 break;
3146 case INDEX_op_movcond:
3147 done = fold_movcond(&ctx, op);
3148 break;
3149 case INDEX_op_mul:
3150 done = fold_mul(&ctx, op);
3151 break;
3152 case INDEX_op_mulsh:
3153 case INDEX_op_muluh:
3154 done = fold_mul_highpart(&ctx, op);
3155 break;
3156 case INDEX_op_muls2:
3157 case INDEX_op_mulu2:
3158 done = fold_multiply2(&ctx, op);
3159 break;
3160 case INDEX_op_nand:
3161 case INDEX_op_nand_vec:
3162 done = fold_nand(&ctx, op);
3163 break;
3164 case INDEX_op_neg:
3165 done = fold_neg(&ctx, op);
3166 break;
3167 case INDEX_op_nor:
3168 case INDEX_op_nor_vec:
3169 done = fold_nor(&ctx, op);
3170 break;
3171 case INDEX_op_not:
3172 case INDEX_op_not_vec:
3173 done = fold_not(&ctx, op);
3174 break;
3175 case INDEX_op_or:
3176 case INDEX_op_or_vec:
3177 done = fold_or(&ctx, op);
3178 break;
3179 case INDEX_op_orc:
3180 case INDEX_op_orc_vec:
3181 done = fold_orc(&ctx, op);
3182 break;
3183 case INDEX_op_qemu_ld:
3184 done = fold_qemu_ld_1reg(&ctx, op);
3185 break;
3186 case INDEX_op_qemu_ld2:
3187 done = fold_qemu_ld_2reg(&ctx, op);
3188 break;
3189 case INDEX_op_qemu_st:
3190 case INDEX_op_qemu_st2:
3191 done = fold_qemu_st(&ctx, op);
3192 break;
3193 case INDEX_op_rems:
3194 case INDEX_op_remu:
3195 done = fold_remainder(&ctx, op);
3196 break;
3197 case INDEX_op_rotl:
3198 case INDEX_op_rotr:
3199 case INDEX_op_sar:
3200 case INDEX_op_shl:
3201 case INDEX_op_shr:
3202 done = fold_shift(&ctx, op);
3203 break;
3204 case INDEX_op_setcond:
3205 done = fold_setcond(&ctx, op);
3206 break;
3207 case INDEX_op_negsetcond:
3208 done = fold_negsetcond(&ctx, op);
3209 break;
3210 case INDEX_op_setcond2_i32:
3211 done = fold_setcond2(&ctx, op);
3212 break;
3213 case INDEX_op_cmp_vec:
3214 done = fold_cmp_vec(&ctx, op);
3215 break;
3216 case INDEX_op_cmpsel_vec:
3217 done = fold_cmpsel_vec(&ctx, op);
3218 break;
3219 case INDEX_op_bitsel_vec:
3220 done = fold_bitsel_vec(&ctx, op);
3221 break;
3222 case INDEX_op_sextract:
3223 done = fold_sextract(&ctx, op);
3224 break;
3225 case INDEX_op_sub:
3226 done = fold_sub(&ctx, op);
3227 break;
3228 case INDEX_op_subbi:
3229 done = fold_subbi(&ctx, op);
3230 break;
3231 case INDEX_op_subbio:
3232 done = fold_subbio(&ctx, op);
3233 break;
3234 case INDEX_op_subbo:
3235 done = fold_subbo(&ctx, op);
3236 break;
3237 case INDEX_op_sub_vec:
3238 done = fold_sub_vec(&ctx, op);
3239 break;
3240 case INDEX_op_xor:
3241 case INDEX_op_xor_vec:
3242 done = fold_xor(&ctx, op);
3243 break;
3244 case INDEX_op_set_label:
3245 case INDEX_op_br:
3246 case INDEX_op_exit_tb:
3247 case INDEX_op_goto_tb:
3248 case INDEX_op_goto_ptr:
3249 finish_ebb(&ctx);
3250 done = true;
3251 break;
3252 default:
3253 done = finish_folding(&ctx, op);
3254 break;
3255 }
3256 tcg_debug_assert(done);
3257 }
3258 }
3259