1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "qemu/osdep.h" 27 #include "qemu/int128.h" 28 #include "qemu/interval-tree.h" 29 #include "tcg/tcg-op-common.h" 30 #include "tcg-internal.h" 31 #include "tcg-has.h" 32 33 34 typedef struct MemCopyInfo { 35 IntervalTreeNode itree; 36 QSIMPLEQ_ENTRY (MemCopyInfo) next; 37 TCGTemp *ts; 38 TCGType type; 39 } MemCopyInfo; 40 41 typedef struct TempOptInfo { 42 bool is_const; 43 TCGTemp *prev_copy; 44 TCGTemp *next_copy; 45 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; 46 uint64_t val; 47 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 48 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */ 49 } TempOptInfo; 50 51 typedef struct OptContext { 52 TCGContext *tcg; 53 TCGOp *prev_mb; 54 TCGTempSet temps_used; 55 56 IntervalTreeRoot mem_copy; 57 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; 58 59 /* In flight values from optimization. */ 60 TCGType type; 61 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ 62 } OptContext; 63 64 static inline TempOptInfo *ts_info(TCGTemp *ts) 65 { 66 return ts->state_ptr; 67 } 68 69 static inline TempOptInfo *arg_info(TCGArg arg) 70 { 71 return ts_info(arg_temp(arg)); 72 } 73 74 static inline bool ti_is_const(TempOptInfo *ti) 75 { 76 return ti->is_const; 77 } 78 79 static inline uint64_t ti_const_val(TempOptInfo *ti) 80 { 81 return ti->val; 82 } 83 84 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val) 85 { 86 return ti_is_const(ti) && ti_const_val(ti) == val; 87 } 88 89 static inline bool ts_is_const(TCGTemp *ts) 90 { 91 return ti_is_const(ts_info(ts)); 92 } 93 94 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val) 95 { 96 return ti_is_const_val(ts_info(ts), val); 97 } 98 99 static inline bool arg_is_const(TCGArg arg) 100 { 101 return ts_is_const(arg_temp(arg)); 102 } 103 104 static inline bool arg_is_const_val(TCGArg arg, uint64_t val) 105 { 106 return ts_is_const_val(arg_temp(arg), val); 107 } 108 109 static inline bool ts_is_copy(TCGTemp *ts) 110 { 111 return ts_info(ts)->next_copy != ts; 112 } 113 114 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b) 115 { 116 return a->kind < b->kind ? b : a; 117 } 118 119 /* Initialize and activate a temporary. */ 120 static void init_ts_info(OptContext *ctx, TCGTemp *ts) 121 { 122 size_t idx = temp_idx(ts); 123 TempOptInfo *ti; 124 125 if (test_bit(idx, ctx->temps_used.l)) { 126 return; 127 } 128 set_bit(idx, ctx->temps_used.l); 129 130 ti = ts->state_ptr; 131 if (ti == NULL) { 132 ti = tcg_malloc(sizeof(TempOptInfo)); 133 ts->state_ptr = ti; 134 } 135 136 ti->next_copy = ts; 137 ti->prev_copy = ts; 138 QSIMPLEQ_INIT(&ti->mem_copy); 139 if (ts->kind == TEMP_CONST) { 140 ti->is_const = true; 141 ti->val = ts->val; 142 ti->z_mask = ts->val; 143 ti->s_mask = INT64_MIN >> clrsb64(ts->val); 144 } else { 145 ti->is_const = false; 146 ti->z_mask = -1; 147 ti->s_mask = 0; 148 } 149 } 150 151 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l) 152 { 153 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l); 154 return r ? container_of(r, MemCopyInfo, itree) : NULL; 155 } 156 157 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l) 158 { 159 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l); 160 return r ? container_of(r, MemCopyInfo, itree) : NULL; 161 } 162 163 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc) 164 { 165 TCGTemp *ts = mc->ts; 166 TempOptInfo *ti = ts_info(ts); 167 168 interval_tree_remove(&mc->itree, &ctx->mem_copy); 169 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next); 170 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next); 171 } 172 173 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l) 174 { 175 while (true) { 176 MemCopyInfo *mc = mem_copy_first(ctx, s, l); 177 if (!mc) { 178 break; 179 } 180 remove_mem_copy(ctx, mc); 181 } 182 } 183 184 static void remove_mem_copy_all(OptContext *ctx) 185 { 186 remove_mem_copy_in(ctx, 0, -1); 187 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy)); 188 } 189 190 static TCGTemp *find_better_copy(TCGTemp *ts) 191 { 192 TCGTemp *i, *ret; 193 194 /* If this is already readonly, we can't do better. */ 195 if (temp_readonly(ts)) { 196 return ts; 197 } 198 199 ret = ts; 200 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 201 ret = cmp_better_copy(ret, i); 202 } 203 return ret; 204 } 205 206 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts) 207 { 208 TempOptInfo *si = ts_info(src_ts); 209 TempOptInfo *di = ts_info(dst_ts); 210 MemCopyInfo *mc; 211 212 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) { 213 tcg_debug_assert(mc->ts == src_ts); 214 mc->ts = dst_ts; 215 } 216 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy); 217 } 218 219 /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 220 static void reset_ts(OptContext *ctx, TCGTemp *ts) 221 { 222 TempOptInfo *ti = ts_info(ts); 223 TCGTemp *pts = ti->prev_copy; 224 TCGTemp *nts = ti->next_copy; 225 TempOptInfo *pi = ts_info(pts); 226 TempOptInfo *ni = ts_info(nts); 227 228 ni->prev_copy = ti->prev_copy; 229 pi->next_copy = ti->next_copy; 230 ti->next_copy = ts; 231 ti->prev_copy = ts; 232 ti->is_const = false; 233 ti->z_mask = -1; 234 ti->s_mask = 0; 235 236 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { 237 if (ts == nts) { 238 /* Last temp copy being removed, the mem copies die. */ 239 MemCopyInfo *mc; 240 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) { 241 interval_tree_remove(&mc->itree, &ctx->mem_copy); 242 } 243 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy); 244 } else { 245 move_mem_copies(find_better_copy(nts), ts); 246 } 247 } 248 } 249 250 static void reset_temp(OptContext *ctx, TCGArg arg) 251 { 252 reset_ts(ctx, arg_temp(arg)); 253 } 254 255 static void record_mem_copy(OptContext *ctx, TCGType type, 256 TCGTemp *ts, intptr_t start, intptr_t last) 257 { 258 MemCopyInfo *mc; 259 TempOptInfo *ti; 260 261 mc = QSIMPLEQ_FIRST(&ctx->mem_free); 262 if (mc) { 263 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next); 264 } else { 265 mc = tcg_malloc(sizeof(*mc)); 266 } 267 268 memset(mc, 0, sizeof(*mc)); 269 mc->itree.start = start; 270 mc->itree.last = last; 271 mc->type = type; 272 interval_tree_insert(&mc->itree, &ctx->mem_copy); 273 274 ts = find_better_copy(ts); 275 ti = ts_info(ts); 276 mc->ts = ts; 277 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next); 278 } 279 280 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 281 { 282 TCGTemp *i; 283 284 if (ts1 == ts2) { 285 return true; 286 } 287 288 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 289 return false; 290 } 291 292 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 293 if (i == ts2) { 294 return true; 295 } 296 } 297 298 return false; 299 } 300 301 static bool args_are_copies(TCGArg arg1, TCGArg arg2) 302 { 303 return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 304 } 305 306 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s) 307 { 308 MemCopyInfo *mc; 309 310 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) { 311 if (mc->itree.start == s && mc->type == type) { 312 return find_better_copy(mc->ts); 313 } 314 } 315 return NULL; 316 } 317 318 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val) 319 { 320 TCGType type = ctx->type; 321 TCGTemp *ts; 322 323 if (type == TCG_TYPE_I32) { 324 val = (int32_t)val; 325 } 326 327 ts = tcg_constant_internal(type, val); 328 init_ts_info(ctx, ts); 329 330 return temp_arg(ts); 331 } 332 333 static TCGArg arg_new_temp(OptContext *ctx) 334 { 335 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB); 336 init_ts_info(ctx, ts); 337 return temp_arg(ts); 338 } 339 340 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, 341 TCGOpcode opc, unsigned narg) 342 { 343 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); 344 } 345 346 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, 347 TCGOpcode opc, unsigned narg) 348 { 349 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); 350 } 351 352 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 353 { 354 TCGTemp *dst_ts = arg_temp(dst); 355 TCGTemp *src_ts = arg_temp(src); 356 TempOptInfo *di; 357 TempOptInfo *si; 358 TCGOpcode new_op; 359 360 if (ts_are_copies(dst_ts, src_ts)) { 361 tcg_op_remove(ctx->tcg, op); 362 return true; 363 } 364 365 reset_ts(ctx, dst_ts); 366 di = ts_info(dst_ts); 367 si = ts_info(src_ts); 368 369 switch (ctx->type) { 370 case TCG_TYPE_I32: 371 case TCG_TYPE_I64: 372 new_op = INDEX_op_mov; 373 break; 374 case TCG_TYPE_V64: 375 case TCG_TYPE_V128: 376 case TCG_TYPE_V256: 377 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */ 378 new_op = INDEX_op_mov_vec; 379 break; 380 default: 381 g_assert_not_reached(); 382 } 383 op->opc = new_op; 384 op->args[0] = dst; 385 op->args[1] = src; 386 387 di->z_mask = si->z_mask; 388 di->s_mask = si->s_mask; 389 390 if (src_ts->type == dst_ts->type) { 391 TempOptInfo *ni = ts_info(si->next_copy); 392 393 di->next_copy = si->next_copy; 394 di->prev_copy = src_ts; 395 ni->prev_copy = dst_ts; 396 si->next_copy = dst_ts; 397 di->is_const = si->is_const; 398 di->val = si->val; 399 400 if (!QSIMPLEQ_EMPTY(&si->mem_copy) 401 && cmp_better_copy(src_ts, dst_ts) == dst_ts) { 402 move_mem_copies(dst_ts, src_ts); 403 } 404 } 405 return true; 406 } 407 408 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 409 TCGArg dst, uint64_t val) 410 { 411 /* Convert movi to mov with constant temp. */ 412 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); 413 } 414 415 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, 416 uint64_t x, uint64_t y) 417 { 418 uint64_t l64, h64; 419 420 switch (op) { 421 case INDEX_op_add: 422 return x + y; 423 424 case INDEX_op_sub: 425 return x - y; 426 427 case INDEX_op_mul: 428 return x * y; 429 430 case INDEX_op_and: 431 case INDEX_op_and_vec: 432 return x & y; 433 434 case INDEX_op_or: 435 case INDEX_op_or_vec: 436 return x | y; 437 438 case INDEX_op_xor: 439 case INDEX_op_xor_vec: 440 return x ^ y; 441 442 case INDEX_op_shl: 443 if (type == TCG_TYPE_I32) { 444 return (uint32_t)x << (y & 31); 445 } 446 return (uint64_t)x << (y & 63); 447 448 case INDEX_op_shr: 449 if (type == TCG_TYPE_I32) { 450 return (uint32_t)x >> (y & 31); 451 } 452 return (uint64_t)x >> (y & 63); 453 454 case INDEX_op_sar: 455 if (type == TCG_TYPE_I32) { 456 return (int32_t)x >> (y & 31); 457 } 458 return (int64_t)x >> (y & 63); 459 460 case INDEX_op_rotr: 461 if (type == TCG_TYPE_I32) { 462 return ror32(x, y & 31); 463 } 464 return ror64(x, y & 63); 465 466 case INDEX_op_rotl: 467 if (type == TCG_TYPE_I32) { 468 return rol32(x, y & 31); 469 } 470 return rol64(x, y & 63); 471 472 case INDEX_op_not: 473 case INDEX_op_not_vec: 474 return ~x; 475 476 case INDEX_op_neg: 477 return -x; 478 479 case INDEX_op_andc: 480 case INDEX_op_andc_vec: 481 return x & ~y; 482 483 case INDEX_op_orc: 484 case INDEX_op_orc_vec: 485 return x | ~y; 486 487 case INDEX_op_eqv: 488 case INDEX_op_eqv_vec: 489 return ~(x ^ y); 490 491 case INDEX_op_nand: 492 case INDEX_op_nand_vec: 493 return ~(x & y); 494 495 case INDEX_op_nor: 496 case INDEX_op_nor_vec: 497 return ~(x | y); 498 499 case INDEX_op_clz: 500 if (type == TCG_TYPE_I32) { 501 return (uint32_t)x ? clz32(x) : y; 502 } 503 return x ? clz64(x) : y; 504 505 case INDEX_op_ctz: 506 if (type == TCG_TYPE_I32) { 507 return (uint32_t)x ? ctz32(x) : y; 508 } 509 return x ? ctz64(x) : y; 510 511 case INDEX_op_ctpop: 512 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); 513 514 case INDEX_op_bswap16: 515 x = bswap16(x); 516 return y & TCG_BSWAP_OS ? (int16_t)x : x; 517 518 case INDEX_op_bswap32: 519 x = bswap32(x); 520 return y & TCG_BSWAP_OS ? (int32_t)x : x; 521 522 case INDEX_op_bswap64: 523 return bswap64(x); 524 525 case INDEX_op_ext_i32_i64: 526 return (int32_t)x; 527 528 case INDEX_op_extu_i32_i64: 529 case INDEX_op_extrl_i64_i32: 530 return (uint32_t)x; 531 532 case INDEX_op_extrh_i64_i32: 533 return (uint64_t)x >> 32; 534 535 case INDEX_op_muluh: 536 if (type == TCG_TYPE_I32) { 537 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 538 } 539 mulu64(&l64, &h64, x, y); 540 return h64; 541 542 case INDEX_op_mulsh: 543 if (type == TCG_TYPE_I32) { 544 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 545 } 546 muls64(&l64, &h64, x, y); 547 return h64; 548 549 case INDEX_op_divs: 550 /* Avoid crashing on divide by zero, otherwise undefined. */ 551 if (type == TCG_TYPE_I32) { 552 return (int32_t)x / ((int32_t)y ? : 1); 553 } 554 return (int64_t)x / ((int64_t)y ? : 1); 555 556 case INDEX_op_divu: 557 if (type == TCG_TYPE_I32) { 558 return (uint32_t)x / ((uint32_t)y ? : 1); 559 } 560 return (uint64_t)x / ((uint64_t)y ? : 1); 561 562 case INDEX_op_rems: 563 if (type == TCG_TYPE_I32) { 564 return (int32_t)x % ((int32_t)y ? : 1); 565 } 566 return (int64_t)x % ((int64_t)y ? : 1); 567 568 case INDEX_op_remu: 569 if (type == TCG_TYPE_I32) { 570 return (uint32_t)x % ((uint32_t)y ? : 1); 571 } 572 return (uint64_t)x % ((uint64_t)y ? : 1); 573 574 default: 575 g_assert_not_reached(); 576 } 577 } 578 579 static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 580 uint64_t x, uint64_t y) 581 { 582 uint64_t res = do_constant_folding_2(op, type, x, y); 583 if (type == TCG_TYPE_I32) { 584 res = (int32_t)res; 585 } 586 return res; 587 } 588 589 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 590 { 591 switch (c) { 592 case TCG_COND_EQ: 593 return x == y; 594 case TCG_COND_NE: 595 return x != y; 596 case TCG_COND_LT: 597 return (int32_t)x < (int32_t)y; 598 case TCG_COND_GE: 599 return (int32_t)x >= (int32_t)y; 600 case TCG_COND_LE: 601 return (int32_t)x <= (int32_t)y; 602 case TCG_COND_GT: 603 return (int32_t)x > (int32_t)y; 604 case TCG_COND_LTU: 605 return x < y; 606 case TCG_COND_GEU: 607 return x >= y; 608 case TCG_COND_LEU: 609 return x <= y; 610 case TCG_COND_GTU: 611 return x > y; 612 case TCG_COND_TSTEQ: 613 return (x & y) == 0; 614 case TCG_COND_TSTNE: 615 return (x & y) != 0; 616 case TCG_COND_ALWAYS: 617 case TCG_COND_NEVER: 618 break; 619 } 620 g_assert_not_reached(); 621 } 622 623 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 624 { 625 switch (c) { 626 case TCG_COND_EQ: 627 return x == y; 628 case TCG_COND_NE: 629 return x != y; 630 case TCG_COND_LT: 631 return (int64_t)x < (int64_t)y; 632 case TCG_COND_GE: 633 return (int64_t)x >= (int64_t)y; 634 case TCG_COND_LE: 635 return (int64_t)x <= (int64_t)y; 636 case TCG_COND_GT: 637 return (int64_t)x > (int64_t)y; 638 case TCG_COND_LTU: 639 return x < y; 640 case TCG_COND_GEU: 641 return x >= y; 642 case TCG_COND_LEU: 643 return x <= y; 644 case TCG_COND_GTU: 645 return x > y; 646 case TCG_COND_TSTEQ: 647 return (x & y) == 0; 648 case TCG_COND_TSTNE: 649 return (x & y) != 0; 650 case TCG_COND_ALWAYS: 651 case TCG_COND_NEVER: 652 break; 653 } 654 g_assert_not_reached(); 655 } 656 657 static int do_constant_folding_cond_eq(TCGCond c) 658 { 659 switch (c) { 660 case TCG_COND_GT: 661 case TCG_COND_LTU: 662 case TCG_COND_LT: 663 case TCG_COND_GTU: 664 case TCG_COND_NE: 665 return 0; 666 case TCG_COND_GE: 667 case TCG_COND_GEU: 668 case TCG_COND_LE: 669 case TCG_COND_LEU: 670 case TCG_COND_EQ: 671 return 1; 672 case TCG_COND_TSTEQ: 673 case TCG_COND_TSTNE: 674 return -1; 675 case TCG_COND_ALWAYS: 676 case TCG_COND_NEVER: 677 break; 678 } 679 g_assert_not_reached(); 680 } 681 682 /* 683 * Return -1 if the condition can't be simplified, 684 * and the result of the condition (0 or 1) if it can. 685 */ 686 static int do_constant_folding_cond(TCGType type, TCGArg x, 687 TCGArg y, TCGCond c) 688 { 689 if (arg_is_const(x) && arg_is_const(y)) { 690 uint64_t xv = arg_info(x)->val; 691 uint64_t yv = arg_info(y)->val; 692 693 switch (type) { 694 case TCG_TYPE_I32: 695 return do_constant_folding_cond_32(xv, yv, c); 696 case TCG_TYPE_I64: 697 return do_constant_folding_cond_64(xv, yv, c); 698 default: 699 /* Only scalar comparisons are optimizable */ 700 return -1; 701 } 702 } else if (args_are_copies(x, y)) { 703 return do_constant_folding_cond_eq(c); 704 } else if (arg_is_const_val(y, 0)) { 705 switch (c) { 706 case TCG_COND_LTU: 707 case TCG_COND_TSTNE: 708 return 0; 709 case TCG_COND_GEU: 710 case TCG_COND_TSTEQ: 711 return 1; 712 default: 713 return -1; 714 } 715 } 716 return -1; 717 } 718 719 /** 720 * swap_commutative: 721 * @dest: TCGArg of the destination argument, or NO_DEST. 722 * @p1: first paired argument 723 * @p2: second paired argument 724 * 725 * If *@p1 is a constant and *@p2 is not, swap. 726 * If *@p2 matches @dest, swap. 727 * Return true if a swap was performed. 728 */ 729 730 #define NO_DEST temp_arg(NULL) 731 732 static int pref_commutative(TempOptInfo *ti) 733 { 734 /* Slight preference for non-zero constants second. */ 735 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; 736 } 737 738 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 739 { 740 TCGArg a1 = *p1, a2 = *p2; 741 int sum = 0; 742 sum += pref_commutative(arg_info(a1)); 743 sum -= pref_commutative(arg_info(a2)); 744 745 /* Prefer the constant in second argument, and then the form 746 op a, a, b, which is better handled on non-RISC hosts. */ 747 if (sum > 0 || (sum == 0 && dest == a2)) { 748 *p1 = a2; 749 *p2 = a1; 750 return true; 751 } 752 return false; 753 } 754 755 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 756 { 757 int sum = 0; 758 sum += pref_commutative(arg_info(p1[0])); 759 sum += pref_commutative(arg_info(p1[1])); 760 sum -= pref_commutative(arg_info(p2[0])); 761 sum -= pref_commutative(arg_info(p2[1])); 762 if (sum > 0) { 763 TCGArg t; 764 t = p1[0], p1[0] = p2[0], p2[0] = t; 765 t = p1[1], p1[1] = p2[1], p2[1] = t; 766 return true; 767 } 768 return false; 769 } 770 771 /* 772 * Return -1 if the condition can't be simplified, 773 * and the result of the condition (0 or 1) if it can. 774 */ 775 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, 776 TCGArg *p1, TCGArg *p2, TCGArg *pcond) 777 { 778 TCGCond cond; 779 TempOptInfo *i1; 780 bool swap; 781 int r; 782 783 swap = swap_commutative(dest, p1, p2); 784 cond = *pcond; 785 if (swap) { 786 *pcond = cond = tcg_swap_cond(cond); 787 } 788 789 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond); 790 if (r >= 0) { 791 return r; 792 } 793 if (!is_tst_cond(cond)) { 794 return -1; 795 } 796 797 i1 = arg_info(*p1); 798 799 /* 800 * TSTNE x,x -> NE x,0 801 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x 802 */ 803 if (args_are_copies(*p1, *p2) || 804 (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) { 805 *p2 = arg_new_constant(ctx, 0); 806 *pcond = tcg_tst_eqne_cond(cond); 807 return -1; 808 } 809 810 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */ 811 if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) { 812 *p2 = arg_new_constant(ctx, 0); 813 *pcond = tcg_tst_ltge_cond(cond); 814 return -1; 815 } 816 817 /* Expand to AND with a temporary if no backend support. */ 818 if (!TCG_TARGET_HAS_tst) { 819 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); 820 TCGArg tmp = arg_new_temp(ctx); 821 822 op2->args[0] = tmp; 823 op2->args[1] = *p1; 824 op2->args[2] = *p2; 825 826 *p1 = tmp; 827 *p2 = arg_new_constant(ctx, 0); 828 *pcond = tcg_tst_eqne_cond(cond); 829 } 830 return -1; 831 } 832 833 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) 834 { 835 TCGArg al, ah, bl, bh; 836 TCGCond c; 837 bool swap; 838 int r; 839 840 swap = swap_commutative2(args, args + 2); 841 c = args[4]; 842 if (swap) { 843 args[4] = c = tcg_swap_cond(c); 844 } 845 846 al = args[0]; 847 ah = args[1]; 848 bl = args[2]; 849 bh = args[3]; 850 851 if (arg_is_const(bl) && arg_is_const(bh)) { 852 tcg_target_ulong blv = arg_info(bl)->val; 853 tcg_target_ulong bhv = arg_info(bh)->val; 854 uint64_t b = deposit64(blv, 32, 32, bhv); 855 856 if (arg_is_const(al) && arg_is_const(ah)) { 857 tcg_target_ulong alv = arg_info(al)->val; 858 tcg_target_ulong ahv = arg_info(ah)->val; 859 uint64_t a = deposit64(alv, 32, 32, ahv); 860 861 r = do_constant_folding_cond_64(a, b, c); 862 if (r >= 0) { 863 return r; 864 } 865 } 866 867 if (b == 0) { 868 switch (c) { 869 case TCG_COND_LTU: 870 case TCG_COND_TSTNE: 871 return 0; 872 case TCG_COND_GEU: 873 case TCG_COND_TSTEQ: 874 return 1; 875 default: 876 break; 877 } 878 } 879 880 /* TSTNE x,-1 -> NE x,0 */ 881 if (b == -1 && is_tst_cond(c)) { 882 args[3] = args[2] = arg_new_constant(ctx, 0); 883 args[4] = tcg_tst_eqne_cond(c); 884 return -1; 885 } 886 887 /* TSTNE x,sign -> LT x,0 */ 888 if (b == INT64_MIN && is_tst_cond(c)) { 889 /* bl must be 0, so copy that to bh */ 890 args[3] = bl; 891 args[4] = tcg_tst_ltge_cond(c); 892 return -1; 893 } 894 } 895 896 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 897 r = do_constant_folding_cond_eq(c); 898 if (r >= 0) { 899 return r; 900 } 901 902 /* TSTNE x,x -> NE x,0 */ 903 if (is_tst_cond(c)) { 904 args[3] = args[2] = arg_new_constant(ctx, 0); 905 args[4] = tcg_tst_eqne_cond(c); 906 return -1; 907 } 908 } 909 910 /* Expand to AND with a temporary if no backend support. */ 911 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { 912 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); 913 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); 914 TCGArg t1 = arg_new_temp(ctx); 915 TCGArg t2 = arg_new_temp(ctx); 916 917 op1->args[0] = t1; 918 op1->args[1] = al; 919 op1->args[2] = bl; 920 op2->args[0] = t2; 921 op2->args[1] = ah; 922 op2->args[2] = bh; 923 924 args[0] = t1; 925 args[1] = t2; 926 args[3] = args[2] = arg_new_constant(ctx, 0); 927 args[4] = tcg_tst_eqne_cond(c); 928 } 929 return -1; 930 } 931 932 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 933 { 934 for (int i = 0; i < nb_args; i++) { 935 TCGTemp *ts = arg_temp(op->args[i]); 936 init_ts_info(ctx, ts); 937 } 938 } 939 940 static void copy_propagate(OptContext *ctx, TCGOp *op, 941 int nb_oargs, int nb_iargs) 942 { 943 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 944 TCGTemp *ts = arg_temp(op->args[i]); 945 if (ts_is_copy(ts)) { 946 op->args[i] = temp_arg(find_better_copy(ts)); 947 } 948 } 949 } 950 951 static void finish_bb(OptContext *ctx) 952 { 953 /* We only optimize memory barriers across basic blocks. */ 954 ctx->prev_mb = NULL; 955 } 956 957 static void finish_ebb(OptContext *ctx) 958 { 959 finish_bb(ctx); 960 /* We only optimize across extended basic blocks. */ 961 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 962 remove_mem_copy_all(ctx); 963 } 964 965 static bool finish_folding(OptContext *ctx, TCGOp *op) 966 { 967 const TCGOpDef *def = &tcg_op_defs[op->opc]; 968 int i, nb_oargs; 969 970 nb_oargs = def->nb_oargs; 971 for (i = 0; i < nb_oargs; i++) { 972 TCGTemp *ts = arg_temp(op->args[i]); 973 reset_ts(ctx, ts); 974 } 975 return true; 976 } 977 978 /* 979 * The fold_* functions return true when processing is complete, 980 * usually by folding the operation to a constant or to a copy, 981 * and calling tcg_opt_gen_{mov,movi}. They may do other things, 982 * like collect information about the value produced, for use in 983 * optimizing a subsequent operation. 984 * 985 * These first fold_* functions are all helpers, used by other 986 * folders for more specific operations. 987 */ 988 989 static bool fold_const1(OptContext *ctx, TCGOp *op) 990 { 991 if (arg_is_const(op->args[1])) { 992 uint64_t t; 993 994 t = arg_info(op->args[1])->val; 995 t = do_constant_folding(op->opc, ctx->type, t, 0); 996 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 997 } 998 return false; 999 } 1000 1001 static bool fold_const2(OptContext *ctx, TCGOp *op) 1002 { 1003 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1004 uint64_t t1 = arg_info(op->args[1])->val; 1005 uint64_t t2 = arg_info(op->args[2])->val; 1006 1007 t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 1008 return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 1009 } 1010 return false; 1011 } 1012 1013 static bool fold_commutative(OptContext *ctx, TCGOp *op) 1014 { 1015 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 1016 return false; 1017 } 1018 1019 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 1020 { 1021 swap_commutative(op->args[0], &op->args[1], &op->args[2]); 1022 return fold_const2(ctx, op); 1023 } 1024 1025 /* 1026 * Record "zero" and "sign" masks for the single output of @op. 1027 * See TempOptInfo definition of z_mask and s_mask. 1028 * If z_mask allows, fold the output to constant zero. 1029 * The passed s_mask may be augmented by z_mask. 1030 */ 1031 static bool fold_masks_zs(OptContext *ctx, TCGOp *op, 1032 uint64_t z_mask, int64_t s_mask) 1033 { 1034 const TCGOpDef *def = &tcg_op_defs[op->opc]; 1035 TCGTemp *ts; 1036 TempOptInfo *ti; 1037 int rep; 1038 1039 /* Only single-output opcodes are supported here. */ 1040 tcg_debug_assert(def->nb_oargs == 1); 1041 1042 /* 1043 * 32-bit ops generate 32-bit results, which for the purpose of 1044 * simplifying tcg are sign-extended. Certainly that's how we 1045 * represent our constants elsewhere. Note that the bits will 1046 * be reset properly for a 64-bit value when encountering the 1047 * type changing opcodes. 1048 */ 1049 if (ctx->type == TCG_TYPE_I32) { 1050 z_mask = (int32_t)z_mask; 1051 s_mask |= INT32_MIN; 1052 } 1053 1054 if (z_mask == 0) { 1055 return tcg_opt_gen_movi(ctx, op, op->args[0], 0); 1056 } 1057 1058 ts = arg_temp(op->args[0]); 1059 reset_ts(ctx, ts); 1060 1061 ti = ts_info(ts); 1062 ti->z_mask = z_mask; 1063 1064 /* Canonicalize s_mask and incorporate data from z_mask. */ 1065 rep = clz64(~s_mask); 1066 rep = MAX(rep, clz64(z_mask)); 1067 rep = MAX(rep - 1, 0); 1068 ti->s_mask = INT64_MIN >> rep; 1069 1070 return true; 1071 } 1072 1073 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) 1074 { 1075 return fold_masks_zs(ctx, op, z_mask, 0); 1076 } 1077 1078 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) 1079 { 1080 return fold_masks_zs(ctx, op, -1, s_mask); 1081 } 1082 1083 /* 1084 * An "affected" mask bit is 0 if and only if the result is identical 1085 * to the first input. Thus if the entire mask is 0, the operation 1086 * is equivalent to a copy. 1087 */ 1088 static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask) 1089 { 1090 if (ctx->type == TCG_TYPE_I32) { 1091 a_mask = (uint32_t)a_mask; 1092 } 1093 if (a_mask == 0) { 1094 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1095 } 1096 return false; 1097 } 1098 1099 /* 1100 * Convert @op to NOT, if NOT is supported by the host. 1101 * Return true f the conversion is successful, which will still 1102 * indicate that the processing is complete. 1103 */ 1104 static bool fold_not(OptContext *ctx, TCGOp *op); 1105 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 1106 { 1107 TCGOpcode not_op; 1108 bool have_not; 1109 1110 switch (ctx->type) { 1111 case TCG_TYPE_I32: 1112 case TCG_TYPE_I64: 1113 not_op = INDEX_op_not; 1114 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); 1115 break; 1116 case TCG_TYPE_V64: 1117 case TCG_TYPE_V128: 1118 case TCG_TYPE_V256: 1119 not_op = INDEX_op_not_vec; 1120 have_not = TCG_TARGET_HAS_not_vec; 1121 break; 1122 default: 1123 g_assert_not_reached(); 1124 } 1125 if (have_not) { 1126 op->opc = not_op; 1127 op->args[1] = op->args[idx]; 1128 return fold_not(ctx, op); 1129 } 1130 return false; 1131 } 1132 1133 /* If the binary operation has first argument @i, fold to @i. */ 1134 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1135 { 1136 if (arg_is_const_val(op->args[1], i)) { 1137 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1138 } 1139 return false; 1140 } 1141 1142 /* If the binary operation has first argument @i, fold to NOT. */ 1143 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1144 { 1145 if (arg_is_const_val(op->args[1], i)) { 1146 return fold_to_not(ctx, op, 2); 1147 } 1148 return false; 1149 } 1150 1151 /* If the binary operation has second argument @i, fold to @i. */ 1152 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1153 { 1154 if (arg_is_const_val(op->args[2], i)) { 1155 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1156 } 1157 return false; 1158 } 1159 1160 /* If the binary operation has second argument @i, fold to identity. */ 1161 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 1162 { 1163 if (arg_is_const_val(op->args[2], i)) { 1164 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1165 } 1166 return false; 1167 } 1168 1169 /* If the binary operation has second argument @i, fold to NOT. */ 1170 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 1171 { 1172 if (arg_is_const_val(op->args[2], i)) { 1173 return fold_to_not(ctx, op, 1); 1174 } 1175 return false; 1176 } 1177 1178 /* If the binary operation has both arguments equal, fold to @i. */ 1179 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 1180 { 1181 if (args_are_copies(op->args[1], op->args[2])) { 1182 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1183 } 1184 return false; 1185 } 1186 1187 /* If the binary operation has both arguments equal, fold to identity. */ 1188 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 1189 { 1190 if (args_are_copies(op->args[1], op->args[2])) { 1191 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1192 } 1193 return false; 1194 } 1195 1196 /* 1197 * These outermost fold_<op> functions are sorted alphabetically. 1198 * 1199 * The ordering of the transformations should be: 1200 * 1) those that produce a constant 1201 * 2) those that produce a copy 1202 * 3) those that produce information about the result value. 1203 */ 1204 1205 static bool fold_addco(OptContext *ctx, TCGOp *op); 1206 static bool fold_or(OptContext *ctx, TCGOp *op); 1207 static bool fold_orc(OptContext *ctx, TCGOp *op); 1208 static bool fold_subbo(OptContext *ctx, TCGOp *op); 1209 static bool fold_xor(OptContext *ctx, TCGOp *op); 1210 1211 static bool fold_add(OptContext *ctx, TCGOp *op) 1212 { 1213 if (fold_const2_commutative(ctx, op) || 1214 fold_xi_to_x(ctx, op, 0)) { 1215 return true; 1216 } 1217 return finish_folding(ctx, op); 1218 } 1219 1220 /* We cannot as yet do_constant_folding with vectors. */ 1221 static bool fold_add_vec(OptContext *ctx, TCGOp *op) 1222 { 1223 if (fold_commutative(ctx, op) || 1224 fold_xi_to_x(ctx, op, 0)) { 1225 return true; 1226 } 1227 return finish_folding(ctx, op); 1228 } 1229 1230 static void squash_prev_carryout(OptContext *ctx, TCGOp *op) 1231 { 1232 TempOptInfo *t2; 1233 1234 op = QTAILQ_PREV(op, link); 1235 switch (op->opc) { 1236 case INDEX_op_addco: 1237 op->opc = INDEX_op_add; 1238 fold_add(ctx, op); 1239 break; 1240 case INDEX_op_addcio: 1241 op->opc = INDEX_op_addci; 1242 break; 1243 case INDEX_op_addc1o: 1244 op->opc = INDEX_op_add; 1245 t2 = arg_info(op->args[2]); 1246 if (ti_is_const(t2)) { 1247 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); 1248 /* Perform other constant folding, if needed. */ 1249 fold_add(ctx, op); 1250 } else { 1251 TCGArg ret = op->args[0]; 1252 op = opt_insert_after(ctx, op, INDEX_op_add, 3); 1253 op->args[0] = ret; 1254 op->args[1] = ret; 1255 op->args[2] = arg_new_constant(ctx, 1); 1256 } 1257 break; 1258 default: 1259 g_assert_not_reached(); 1260 } 1261 } 1262 1263 static bool fold_addci(OptContext *ctx, TCGOp *op) 1264 { 1265 fold_commutative(ctx, op); 1266 1267 if (ctx->carry_state < 0) { 1268 return finish_folding(ctx, op); 1269 } 1270 1271 squash_prev_carryout(ctx, op); 1272 op->opc = INDEX_op_add; 1273 1274 if (ctx->carry_state > 0) { 1275 TempOptInfo *t2 = arg_info(op->args[2]); 1276 1277 /* 1278 * Propagate the known carry-in into a constant, if possible. 1279 * Otherwise emit a second add +1. 1280 */ 1281 if (ti_is_const(t2)) { 1282 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); 1283 } else { 1284 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); 1285 1286 op2->args[0] = op->args[0]; 1287 op2->args[1] = op->args[1]; 1288 op2->args[2] = op->args[2]; 1289 fold_add(ctx, op2); 1290 1291 op->args[1] = op->args[0]; 1292 op->args[2] = arg_new_constant(ctx, 1); 1293 } 1294 } 1295 1296 ctx->carry_state = -1; 1297 return fold_add(ctx, op); 1298 } 1299 1300 static bool fold_addcio(OptContext *ctx, TCGOp *op) 1301 { 1302 TempOptInfo *t1, *t2; 1303 int carry_out = -1; 1304 uint64_t sum, max; 1305 1306 fold_commutative(ctx, op); 1307 t1 = arg_info(op->args[1]); 1308 t2 = arg_info(op->args[2]); 1309 1310 /* 1311 * The z_mask value is >= the maximum value that can be represented 1312 * with the known zero bits. So adding the z_mask values will not 1313 * overflow if and only if the true values cannot overflow. 1314 */ 1315 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && 1316 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { 1317 carry_out = 0; 1318 } 1319 1320 if (ctx->carry_state < 0) { 1321 ctx->carry_state = carry_out; 1322 return finish_folding(ctx, op); 1323 } 1324 1325 squash_prev_carryout(ctx, op); 1326 if (ctx->carry_state == 0) { 1327 goto do_addco; 1328 } 1329 1330 /* Propagate the known carry-in into a constant, if possible. */ 1331 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; 1332 if (ti_is_const(t2)) { 1333 uint64_t v = ti_const_val(t2) & max; 1334 if (v < max) { 1335 op->args[2] = arg_new_constant(ctx, v + 1); 1336 goto do_addco; 1337 } 1338 /* max + known carry in produces known carry out. */ 1339 carry_out = 1; 1340 } 1341 if (ti_is_const(t1)) { 1342 uint64_t v = ti_const_val(t1) & max; 1343 if (v < max) { 1344 op->args[1] = arg_new_constant(ctx, v + 1); 1345 goto do_addco; 1346 } 1347 carry_out = 1; 1348 } 1349 1350 /* Adjust the opcode to remember the known carry-in. */ 1351 op->opc = INDEX_op_addc1o; 1352 ctx->carry_state = carry_out; 1353 return finish_folding(ctx, op); 1354 1355 do_addco: 1356 op->opc = INDEX_op_addco; 1357 return fold_addco(ctx, op); 1358 } 1359 1360 static bool fold_addco(OptContext *ctx, TCGOp *op) 1361 { 1362 TempOptInfo *t1, *t2; 1363 int carry_out = -1; 1364 uint64_t ign; 1365 1366 fold_commutative(ctx, op); 1367 t1 = arg_info(op->args[1]); 1368 t2 = arg_info(op->args[2]); 1369 1370 if (ti_is_const(t2)) { 1371 uint64_t v2 = ti_const_val(t2); 1372 1373 if (ti_is_const(t1)) { 1374 uint64_t v1 = ti_const_val(t1); 1375 /* Given sign-extension of z_mask for I32, we need not truncate. */ 1376 carry_out = uadd64_overflow(v1, v2, &ign); 1377 } else if (v2 == 0) { 1378 carry_out = 0; 1379 } 1380 } else { 1381 /* 1382 * The z_mask value is >= the maximum value that can be represented 1383 * with the known zero bits. So adding the z_mask values will not 1384 * overflow if and only if the true values cannot overflow. 1385 */ 1386 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { 1387 carry_out = 0; 1388 } 1389 } 1390 ctx->carry_state = carry_out; 1391 return finish_folding(ctx, op); 1392 } 1393 1394 static bool fold_and(OptContext *ctx, TCGOp *op) 1395 { 1396 uint64_t z1, z2, z_mask, s_mask; 1397 TempOptInfo *t1, *t2; 1398 1399 if (fold_const2_commutative(ctx, op) || 1400 fold_xi_to_i(ctx, op, 0) || 1401 fold_xi_to_x(ctx, op, -1) || 1402 fold_xx_to_x(ctx, op)) { 1403 return true; 1404 } 1405 1406 t1 = arg_info(op->args[1]); 1407 t2 = arg_info(op->args[2]); 1408 z1 = t1->z_mask; 1409 z2 = t2->z_mask; 1410 1411 /* 1412 * Known-zeros does not imply known-ones. Therefore unless 1413 * arg2 is constant, we can't infer affected bits from it. 1414 */ 1415 if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) { 1416 return true; 1417 } 1418 1419 z_mask = z1 & z2; 1420 1421 /* 1422 * Sign repetitions are perforce all identical, whether they are 1 or 0. 1423 * Bitwise operations preserve the relative quantity of the repetitions. 1424 */ 1425 s_mask = t1->s_mask & t2->s_mask; 1426 1427 return fold_masks_zs(ctx, op, z_mask, s_mask); 1428 } 1429 1430 static bool fold_andc(OptContext *ctx, TCGOp *op) 1431 { 1432 uint64_t z_mask, s_mask; 1433 TempOptInfo *t1, *t2; 1434 1435 if (fold_const2(ctx, op) || 1436 fold_xx_to_i(ctx, op, 0) || 1437 fold_xi_to_x(ctx, op, 0) || 1438 fold_ix_to_not(ctx, op, -1)) { 1439 return true; 1440 } 1441 1442 t1 = arg_info(op->args[1]); 1443 t2 = arg_info(op->args[2]); 1444 z_mask = t1->z_mask; 1445 1446 if (ti_is_const(t2)) { 1447 /* Fold andc r,x,i to and r,x,~i. */ 1448 switch (ctx->type) { 1449 case TCG_TYPE_I32: 1450 case TCG_TYPE_I64: 1451 op->opc = INDEX_op_and; 1452 break; 1453 case TCG_TYPE_V64: 1454 case TCG_TYPE_V128: 1455 case TCG_TYPE_V256: 1456 op->opc = INDEX_op_and_vec; 1457 break; 1458 default: 1459 g_assert_not_reached(); 1460 } 1461 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 1462 return fold_and(ctx, op); 1463 } 1464 1465 /* 1466 * Known-zeros does not imply known-ones. Therefore unless 1467 * arg2 is constant, we can't infer anything from it. 1468 */ 1469 if (ti_is_const(t2)) { 1470 uint64_t v2 = ti_const_val(t2); 1471 if (fold_affected_mask(ctx, op, z_mask & v2)) { 1472 return true; 1473 } 1474 z_mask &= ~v2; 1475 } 1476 1477 s_mask = t1->s_mask & t2->s_mask; 1478 return fold_masks_zs(ctx, op, z_mask, s_mask); 1479 } 1480 1481 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) 1482 { 1483 /* If true and false values are the same, eliminate the cmp. */ 1484 if (args_are_copies(op->args[2], op->args[3])) { 1485 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1486 } 1487 1488 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1489 uint64_t tv = arg_info(op->args[2])->val; 1490 uint64_t fv = arg_info(op->args[3])->val; 1491 1492 if (tv == -1 && fv == 0) { 1493 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 1494 } 1495 if (tv == 0 && fv == -1) { 1496 if (TCG_TARGET_HAS_not_vec) { 1497 op->opc = INDEX_op_not_vec; 1498 return fold_not(ctx, op); 1499 } else { 1500 op->opc = INDEX_op_xor_vec; 1501 op->args[2] = arg_new_constant(ctx, -1); 1502 return fold_xor(ctx, op); 1503 } 1504 } 1505 } 1506 if (arg_is_const(op->args[2])) { 1507 uint64_t tv = arg_info(op->args[2])->val; 1508 if (tv == -1) { 1509 op->opc = INDEX_op_or_vec; 1510 op->args[2] = op->args[3]; 1511 return fold_or(ctx, op); 1512 } 1513 if (tv == 0 && TCG_TARGET_HAS_andc_vec) { 1514 op->opc = INDEX_op_andc_vec; 1515 op->args[2] = op->args[1]; 1516 op->args[1] = op->args[3]; 1517 return fold_andc(ctx, op); 1518 } 1519 } 1520 if (arg_is_const(op->args[3])) { 1521 uint64_t fv = arg_info(op->args[3])->val; 1522 if (fv == 0) { 1523 op->opc = INDEX_op_and_vec; 1524 return fold_and(ctx, op); 1525 } 1526 if (fv == -1 && TCG_TARGET_HAS_orc_vec) { 1527 op->opc = INDEX_op_orc_vec; 1528 op->args[2] = op->args[1]; 1529 op->args[1] = op->args[3]; 1530 return fold_orc(ctx, op); 1531 } 1532 } 1533 return finish_folding(ctx, op); 1534 } 1535 1536 static bool fold_brcond(OptContext *ctx, TCGOp *op) 1537 { 1538 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0], 1539 &op->args[1], &op->args[2]); 1540 if (i == 0) { 1541 tcg_op_remove(ctx->tcg, op); 1542 return true; 1543 } 1544 if (i > 0) { 1545 op->opc = INDEX_op_br; 1546 op->args[0] = op->args[3]; 1547 finish_ebb(ctx); 1548 } else { 1549 finish_bb(ctx); 1550 } 1551 return true; 1552 } 1553 1554 static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1555 { 1556 TCGCond cond; 1557 TCGArg label; 1558 int i, inv = 0; 1559 1560 i = do_constant_folding_cond2(ctx, op, &op->args[0]); 1561 cond = op->args[4]; 1562 label = op->args[5]; 1563 if (i >= 0) { 1564 goto do_brcond_const; 1565 } 1566 1567 switch (cond) { 1568 case TCG_COND_LT: 1569 case TCG_COND_GE: 1570 /* 1571 * Simplify LT/GE comparisons vs zero to a single compare 1572 * vs the high word of the input. 1573 */ 1574 if (arg_is_const_val(op->args[2], 0) && 1575 arg_is_const_val(op->args[3], 0)) { 1576 goto do_brcond_high; 1577 } 1578 break; 1579 1580 case TCG_COND_NE: 1581 inv = 1; 1582 QEMU_FALLTHROUGH; 1583 case TCG_COND_EQ: 1584 /* 1585 * Simplify EQ/NE comparisons where one of the pairs 1586 * can be simplified. 1587 */ 1588 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1589 op->args[2], cond); 1590 switch (i ^ inv) { 1591 case 0: 1592 goto do_brcond_const; 1593 case 1: 1594 goto do_brcond_high; 1595 } 1596 1597 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1598 op->args[3], cond); 1599 switch (i ^ inv) { 1600 case 0: 1601 goto do_brcond_const; 1602 case 1: 1603 goto do_brcond_low; 1604 } 1605 break; 1606 1607 case TCG_COND_TSTEQ: 1608 case TCG_COND_TSTNE: 1609 if (arg_is_const_val(op->args[2], 0)) { 1610 goto do_brcond_high; 1611 } 1612 if (arg_is_const_val(op->args[3], 0)) { 1613 goto do_brcond_low; 1614 } 1615 break; 1616 1617 default: 1618 break; 1619 1620 do_brcond_low: 1621 op->opc = INDEX_op_brcond; 1622 op->args[1] = op->args[2]; 1623 op->args[2] = cond; 1624 op->args[3] = label; 1625 return fold_brcond(ctx, op); 1626 1627 do_brcond_high: 1628 op->opc = INDEX_op_brcond; 1629 op->args[0] = op->args[1]; 1630 op->args[1] = op->args[3]; 1631 op->args[2] = cond; 1632 op->args[3] = label; 1633 return fold_brcond(ctx, op); 1634 1635 do_brcond_const: 1636 if (i == 0) { 1637 tcg_op_remove(ctx->tcg, op); 1638 return true; 1639 } 1640 op->opc = INDEX_op_br; 1641 op->args[0] = label; 1642 finish_ebb(ctx); 1643 return true; 1644 } 1645 1646 finish_bb(ctx); 1647 return true; 1648 } 1649 1650 static bool fold_bswap(OptContext *ctx, TCGOp *op) 1651 { 1652 uint64_t z_mask, s_mask, sign; 1653 TempOptInfo *t1 = arg_info(op->args[1]); 1654 1655 if (ti_is_const(t1)) { 1656 return tcg_opt_gen_movi(ctx, op, op->args[0], 1657 do_constant_folding(op->opc, ctx->type, 1658 ti_const_val(t1), 1659 op->args[2])); 1660 } 1661 1662 z_mask = t1->z_mask; 1663 switch (op->opc) { 1664 case INDEX_op_bswap16: 1665 z_mask = bswap16(z_mask); 1666 sign = INT16_MIN; 1667 break; 1668 case INDEX_op_bswap32: 1669 z_mask = bswap32(z_mask); 1670 sign = INT32_MIN; 1671 break; 1672 case INDEX_op_bswap64: 1673 z_mask = bswap64(z_mask); 1674 sign = INT64_MIN; 1675 break; 1676 default: 1677 g_assert_not_reached(); 1678 } 1679 1680 s_mask = 0; 1681 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1682 case TCG_BSWAP_OZ: 1683 break; 1684 case TCG_BSWAP_OS: 1685 /* If the sign bit may be 1, force all the bits above to 1. */ 1686 if (z_mask & sign) { 1687 z_mask |= sign; 1688 } 1689 /* The value and therefore s_mask is explicitly sign-extended. */ 1690 s_mask = sign; 1691 break; 1692 default: 1693 /* The high bits are undefined: force all bits above the sign to 1. */ 1694 z_mask |= sign << 1; 1695 break; 1696 } 1697 1698 return fold_masks_zs(ctx, op, z_mask, s_mask); 1699 } 1700 1701 static bool fold_call(OptContext *ctx, TCGOp *op) 1702 { 1703 TCGContext *s = ctx->tcg; 1704 int nb_oargs = TCGOP_CALLO(op); 1705 int nb_iargs = TCGOP_CALLI(op); 1706 int flags, i; 1707 1708 init_arguments(ctx, op, nb_oargs + nb_iargs); 1709 copy_propagate(ctx, op, nb_oargs, nb_iargs); 1710 1711 /* If the function reads or writes globals, reset temp data. */ 1712 flags = tcg_call_flags(op); 1713 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1714 int nb_globals = s->nb_globals; 1715 1716 for (i = 0; i < nb_globals; i++) { 1717 if (test_bit(i, ctx->temps_used.l)) { 1718 reset_ts(ctx, &ctx->tcg->temps[i]); 1719 } 1720 } 1721 } 1722 1723 /* If the function has side effects, reset mem data. */ 1724 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1725 remove_mem_copy_all(ctx); 1726 } 1727 1728 /* Reset temp data for outputs. */ 1729 for (i = 0; i < nb_oargs; i++) { 1730 reset_temp(ctx, op->args[i]); 1731 } 1732 1733 /* Stop optimizing MB across calls. */ 1734 ctx->prev_mb = NULL; 1735 return true; 1736 } 1737 1738 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op) 1739 { 1740 /* Canonicalize the comparison to put immediate second. */ 1741 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1742 op->args[3] = tcg_swap_cond(op->args[3]); 1743 } 1744 return finish_folding(ctx, op); 1745 } 1746 1747 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op) 1748 { 1749 /* If true and false values are the same, eliminate the cmp. */ 1750 if (args_are_copies(op->args[3], op->args[4])) { 1751 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); 1752 } 1753 1754 /* Canonicalize the comparison to put immediate second. */ 1755 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 1756 op->args[5] = tcg_swap_cond(op->args[5]); 1757 } 1758 /* 1759 * Canonicalize the "false" input reg to match the destination, 1760 * so that the tcg backend can implement "move if true". 1761 */ 1762 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 1763 op->args[5] = tcg_invert_cond(op->args[5]); 1764 } 1765 return finish_folding(ctx, op); 1766 } 1767 1768 static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 1769 { 1770 uint64_t z_mask, s_mask; 1771 TempOptInfo *t1 = arg_info(op->args[1]); 1772 TempOptInfo *t2 = arg_info(op->args[2]); 1773 1774 if (ti_is_const(t1)) { 1775 uint64_t t = ti_const_val(t1); 1776 1777 if (t != 0) { 1778 t = do_constant_folding(op->opc, ctx->type, t, 0); 1779 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1780 } 1781 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 1782 } 1783 1784 switch (ctx->type) { 1785 case TCG_TYPE_I32: 1786 z_mask = 31; 1787 break; 1788 case TCG_TYPE_I64: 1789 z_mask = 63; 1790 break; 1791 default: 1792 g_assert_not_reached(); 1793 } 1794 s_mask = ~z_mask; 1795 z_mask |= t2->z_mask; 1796 s_mask &= t2->s_mask; 1797 1798 return fold_masks_zs(ctx, op, z_mask, s_mask); 1799 } 1800 1801 static bool fold_ctpop(OptContext *ctx, TCGOp *op) 1802 { 1803 uint64_t z_mask; 1804 1805 if (fold_const1(ctx, op)) { 1806 return true; 1807 } 1808 1809 switch (ctx->type) { 1810 case TCG_TYPE_I32: 1811 z_mask = 32 | 31; 1812 break; 1813 case TCG_TYPE_I64: 1814 z_mask = 64 | 63; 1815 break; 1816 default: 1817 g_assert_not_reached(); 1818 } 1819 return fold_masks_z(ctx, op, z_mask); 1820 } 1821 1822 static bool fold_deposit(OptContext *ctx, TCGOp *op) 1823 { 1824 TempOptInfo *t1 = arg_info(op->args[1]); 1825 TempOptInfo *t2 = arg_info(op->args[2]); 1826 int ofs = op->args[3]; 1827 int len = op->args[4]; 1828 int width = 8 * tcg_type_size(ctx->type); 1829 uint64_t z_mask, s_mask; 1830 1831 if (ti_is_const(t1) && ti_is_const(t2)) { 1832 return tcg_opt_gen_movi(ctx, op, op->args[0], 1833 deposit64(ti_const_val(t1), ofs, len, 1834 ti_const_val(t2))); 1835 } 1836 1837 /* Inserting a value into zero at offset 0. */ 1838 if (ti_is_const_val(t1, 0) && ofs == 0) { 1839 uint64_t mask = MAKE_64BIT_MASK(0, len); 1840 1841 op->opc = INDEX_op_and; 1842 op->args[1] = op->args[2]; 1843 op->args[2] = arg_new_constant(ctx, mask); 1844 return fold_and(ctx, op); 1845 } 1846 1847 /* Inserting zero into a value. */ 1848 if (ti_is_const_val(t2, 0)) { 1849 uint64_t mask = deposit64(-1, ofs, len, 0); 1850 1851 op->opc = INDEX_op_and; 1852 op->args[2] = arg_new_constant(ctx, mask); 1853 return fold_and(ctx, op); 1854 } 1855 1856 /* The s_mask from the top portion of the deposit is still valid. */ 1857 if (ofs + len == width) { 1858 s_mask = t2->s_mask << ofs; 1859 } else { 1860 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len); 1861 } 1862 1863 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); 1864 return fold_masks_zs(ctx, op, z_mask, s_mask); 1865 } 1866 1867 static bool fold_divide(OptContext *ctx, TCGOp *op) 1868 { 1869 if (fold_const2(ctx, op) || 1870 fold_xi_to_x(ctx, op, 1)) { 1871 return true; 1872 } 1873 return finish_folding(ctx, op); 1874 } 1875 1876 static bool fold_dup(OptContext *ctx, TCGOp *op) 1877 { 1878 if (arg_is_const(op->args[1])) { 1879 uint64_t t = arg_info(op->args[1])->val; 1880 t = dup_const(TCGOP_VECE(op), t); 1881 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1882 } 1883 return finish_folding(ctx, op); 1884 } 1885 1886 static bool fold_dup2(OptContext *ctx, TCGOp *op) 1887 { 1888 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1889 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, 1890 arg_info(op->args[2])->val); 1891 return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1892 } 1893 1894 if (args_are_copies(op->args[1], op->args[2])) { 1895 op->opc = INDEX_op_dup_vec; 1896 TCGOP_VECE(op) = MO_32; 1897 } 1898 return finish_folding(ctx, op); 1899 } 1900 1901 static bool fold_eqv(OptContext *ctx, TCGOp *op) 1902 { 1903 uint64_t s_mask; 1904 TempOptInfo *t1, *t2; 1905 1906 if (fold_const2_commutative(ctx, op) || 1907 fold_xi_to_x(ctx, op, -1) || 1908 fold_xi_to_not(ctx, op, 0)) { 1909 return true; 1910 } 1911 1912 t2 = arg_info(op->args[2]); 1913 if (ti_is_const(t2)) { 1914 /* Fold eqv r,x,i to xor r,x,~i. */ 1915 switch (ctx->type) { 1916 case TCG_TYPE_I32: 1917 case TCG_TYPE_I64: 1918 op->opc = INDEX_op_xor; 1919 break; 1920 case TCG_TYPE_V64: 1921 case TCG_TYPE_V128: 1922 case TCG_TYPE_V256: 1923 op->opc = INDEX_op_xor_vec; 1924 break; 1925 default: 1926 g_assert_not_reached(); 1927 } 1928 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 1929 return fold_xor(ctx, op); 1930 } 1931 1932 t1 = arg_info(op->args[1]); 1933 s_mask = t1->s_mask & t2->s_mask; 1934 return fold_masks_s(ctx, op, s_mask); 1935 } 1936 1937 static bool fold_extract(OptContext *ctx, TCGOp *op) 1938 { 1939 uint64_t z_mask_old, z_mask; 1940 TempOptInfo *t1 = arg_info(op->args[1]); 1941 int pos = op->args[2]; 1942 int len = op->args[3]; 1943 1944 if (ti_is_const(t1)) { 1945 return tcg_opt_gen_movi(ctx, op, op->args[0], 1946 extract64(ti_const_val(t1), pos, len)); 1947 } 1948 1949 z_mask_old = t1->z_mask; 1950 z_mask = extract64(z_mask_old, pos, len); 1951 if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) { 1952 return true; 1953 } 1954 1955 return fold_masks_z(ctx, op, z_mask); 1956 } 1957 1958 static bool fold_extract2(OptContext *ctx, TCGOp *op) 1959 { 1960 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1961 uint64_t v1 = arg_info(op->args[1])->val; 1962 uint64_t v2 = arg_info(op->args[2])->val; 1963 int shr = op->args[3]; 1964 1965 if (ctx->type == TCG_TYPE_I32) { 1966 v1 = (uint32_t)v1 >> shr; 1967 v2 = (uint64_t)((int32_t)v2 << (32 - shr)); 1968 } else { 1969 v1 >>= shr; 1970 v2 <<= 64 - shr; 1971 } 1972 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); 1973 } 1974 return finish_folding(ctx, op); 1975 } 1976 1977 static bool fold_exts(OptContext *ctx, TCGOp *op) 1978 { 1979 uint64_t s_mask, z_mask; 1980 TempOptInfo *t1; 1981 1982 if (fold_const1(ctx, op)) { 1983 return true; 1984 } 1985 1986 t1 = arg_info(op->args[1]); 1987 z_mask = t1->z_mask; 1988 s_mask = t1->s_mask; 1989 1990 switch (op->opc) { 1991 case INDEX_op_ext_i32_i64: 1992 s_mask |= INT32_MIN; 1993 z_mask = (int32_t)z_mask; 1994 break; 1995 default: 1996 g_assert_not_reached(); 1997 } 1998 return fold_masks_zs(ctx, op, z_mask, s_mask); 1999 } 2000 2001 static bool fold_extu(OptContext *ctx, TCGOp *op) 2002 { 2003 uint64_t z_mask; 2004 2005 if (fold_const1(ctx, op)) { 2006 return true; 2007 } 2008 2009 z_mask = arg_info(op->args[1])->z_mask; 2010 switch (op->opc) { 2011 case INDEX_op_extrl_i64_i32: 2012 case INDEX_op_extu_i32_i64: 2013 z_mask = (uint32_t)z_mask; 2014 break; 2015 case INDEX_op_extrh_i64_i32: 2016 z_mask >>= 32; 2017 break; 2018 default: 2019 g_assert_not_reached(); 2020 } 2021 return fold_masks_z(ctx, op, z_mask); 2022 } 2023 2024 static bool fold_mb(OptContext *ctx, TCGOp *op) 2025 { 2026 /* Eliminate duplicate and redundant fence instructions. */ 2027 if (ctx->prev_mb) { 2028 /* 2029 * Merge two barriers of the same type into one, 2030 * or a weaker barrier into a stronger one, 2031 * or two weaker barriers into a stronger one. 2032 * mb X; mb Y => mb X|Y 2033 * mb; strl => mb; st 2034 * ldaq; mb => ld; mb 2035 * ldaq; strl => ld; mb; st 2036 * Other combinations are also merged into a strong 2037 * barrier. This is stricter than specified but for 2038 * the purposes of TCG is better than not optimizing. 2039 */ 2040 ctx->prev_mb->args[0] |= op->args[0]; 2041 tcg_op_remove(ctx->tcg, op); 2042 } else { 2043 ctx->prev_mb = op; 2044 } 2045 return true; 2046 } 2047 2048 static bool fold_mov(OptContext *ctx, TCGOp *op) 2049 { 2050 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 2051 } 2052 2053 static bool fold_movcond(OptContext *ctx, TCGOp *op) 2054 { 2055 uint64_t z_mask, s_mask; 2056 TempOptInfo *tt, *ft; 2057 int i; 2058 2059 /* If true and false values are the same, eliminate the cmp. */ 2060 if (args_are_copies(op->args[3], op->args[4])) { 2061 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); 2062 } 2063 2064 /* 2065 * Canonicalize the "false" input reg to match the destination reg so 2066 * that the tcg backend can implement a "move if true" operation. 2067 */ 2068 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 2069 op->args[5] = tcg_invert_cond(op->args[5]); 2070 } 2071 2072 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1], 2073 &op->args[2], &op->args[5]); 2074 if (i >= 0) { 2075 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 2076 } 2077 2078 tt = arg_info(op->args[3]); 2079 ft = arg_info(op->args[4]); 2080 z_mask = tt->z_mask | ft->z_mask; 2081 s_mask = tt->s_mask & ft->s_mask; 2082 2083 if (ti_is_const(tt) && ti_is_const(ft)) { 2084 uint64_t tv = ti_const_val(tt); 2085 uint64_t fv = ti_const_val(ft); 2086 TCGCond cond = op->args[5]; 2087 2088 if (tv == 1 && fv == 0) { 2089 op->opc = INDEX_op_setcond; 2090 op->args[3] = cond; 2091 } else if (fv == 1 && tv == 0) { 2092 op->opc = INDEX_op_setcond; 2093 op->args[3] = tcg_invert_cond(cond); 2094 } else if (tv == -1 && fv == 0) { 2095 op->opc = INDEX_op_negsetcond; 2096 op->args[3] = cond; 2097 } else if (fv == -1 && tv == 0) { 2098 op->opc = INDEX_op_negsetcond; 2099 op->args[3] = tcg_invert_cond(cond); 2100 } 2101 } 2102 2103 return fold_masks_zs(ctx, op, z_mask, s_mask); 2104 } 2105 2106 static bool fold_mul(OptContext *ctx, TCGOp *op) 2107 { 2108 if (fold_const2(ctx, op) || 2109 fold_xi_to_i(ctx, op, 0) || 2110 fold_xi_to_x(ctx, op, 1)) { 2111 return true; 2112 } 2113 return finish_folding(ctx, op); 2114 } 2115 2116 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 2117 { 2118 if (fold_const2_commutative(ctx, op) || 2119 fold_xi_to_i(ctx, op, 0)) { 2120 return true; 2121 } 2122 return finish_folding(ctx, op); 2123 } 2124 2125 static bool fold_multiply2(OptContext *ctx, TCGOp *op) 2126 { 2127 swap_commutative(op->args[0], &op->args[2], &op->args[3]); 2128 2129 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 2130 uint64_t a = arg_info(op->args[2])->val; 2131 uint64_t b = arg_info(op->args[3])->val; 2132 uint64_t h, l; 2133 TCGArg rl, rh; 2134 TCGOp *op2; 2135 2136 switch (op->opc) { 2137 case INDEX_op_mulu2: 2138 if (ctx->type == TCG_TYPE_I32) { 2139 l = (uint64_t)(uint32_t)a * (uint32_t)b; 2140 h = (int32_t)(l >> 32); 2141 l = (int32_t)l; 2142 } else { 2143 mulu64(&l, &h, a, b); 2144 } 2145 break; 2146 case INDEX_op_muls2: 2147 if (ctx->type == TCG_TYPE_I32) { 2148 l = (int64_t)(int32_t)a * (int32_t)b; 2149 h = l >> 32; 2150 l = (int32_t)l; 2151 } else { 2152 muls64(&l, &h, a, b); 2153 } 2154 break; 2155 default: 2156 g_assert_not_reached(); 2157 } 2158 2159 rl = op->args[0]; 2160 rh = op->args[1]; 2161 2162 /* The proper opcode is supplied by tcg_opt_gen_mov. */ 2163 op2 = opt_insert_before(ctx, op, 0, 2); 2164 2165 tcg_opt_gen_movi(ctx, op, rl, l); 2166 tcg_opt_gen_movi(ctx, op2, rh, h); 2167 return true; 2168 } 2169 return finish_folding(ctx, op); 2170 } 2171 2172 static bool fold_nand(OptContext *ctx, TCGOp *op) 2173 { 2174 uint64_t s_mask; 2175 2176 if (fold_const2_commutative(ctx, op) || 2177 fold_xi_to_not(ctx, op, -1)) { 2178 return true; 2179 } 2180 2181 s_mask = arg_info(op->args[1])->s_mask 2182 & arg_info(op->args[2])->s_mask; 2183 return fold_masks_s(ctx, op, s_mask); 2184 } 2185 2186 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op) 2187 { 2188 /* Set to 1 all bits to the left of the rightmost. */ 2189 uint64_t z_mask = arg_info(op->args[1])->z_mask; 2190 z_mask = -(z_mask & -z_mask); 2191 2192 return fold_masks_z(ctx, op, z_mask); 2193 } 2194 2195 static bool fold_neg(OptContext *ctx, TCGOp *op) 2196 { 2197 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op); 2198 } 2199 2200 static bool fold_nor(OptContext *ctx, TCGOp *op) 2201 { 2202 uint64_t s_mask; 2203 2204 if (fold_const2_commutative(ctx, op) || 2205 fold_xi_to_not(ctx, op, 0)) { 2206 return true; 2207 } 2208 2209 s_mask = arg_info(op->args[1])->s_mask 2210 & arg_info(op->args[2])->s_mask; 2211 return fold_masks_s(ctx, op, s_mask); 2212 } 2213 2214 static bool fold_not(OptContext *ctx, TCGOp *op) 2215 { 2216 if (fold_const1(ctx, op)) { 2217 return true; 2218 } 2219 return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask); 2220 } 2221 2222 static bool fold_or(OptContext *ctx, TCGOp *op) 2223 { 2224 uint64_t z_mask, s_mask; 2225 TempOptInfo *t1, *t2; 2226 2227 if (fold_const2_commutative(ctx, op) || 2228 fold_xi_to_x(ctx, op, 0) || 2229 fold_xx_to_x(ctx, op)) { 2230 return true; 2231 } 2232 2233 t1 = arg_info(op->args[1]); 2234 t2 = arg_info(op->args[2]); 2235 z_mask = t1->z_mask | t2->z_mask; 2236 s_mask = t1->s_mask & t2->s_mask; 2237 return fold_masks_zs(ctx, op, z_mask, s_mask); 2238 } 2239 2240 static bool fold_orc(OptContext *ctx, TCGOp *op) 2241 { 2242 uint64_t s_mask; 2243 TempOptInfo *t1, *t2; 2244 2245 if (fold_const2(ctx, op) || 2246 fold_xx_to_i(ctx, op, -1) || 2247 fold_xi_to_x(ctx, op, -1) || 2248 fold_ix_to_not(ctx, op, 0)) { 2249 return true; 2250 } 2251 2252 t2 = arg_info(op->args[2]); 2253 if (ti_is_const(t2)) { 2254 /* Fold orc r,x,i to or r,x,~i. */ 2255 switch (ctx->type) { 2256 case TCG_TYPE_I32: 2257 case TCG_TYPE_I64: 2258 op->opc = INDEX_op_or; 2259 break; 2260 case TCG_TYPE_V64: 2261 case TCG_TYPE_V128: 2262 case TCG_TYPE_V256: 2263 op->opc = INDEX_op_or_vec; 2264 break; 2265 default: 2266 g_assert_not_reached(); 2267 } 2268 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); 2269 return fold_or(ctx, op); 2270 } 2271 2272 t1 = arg_info(op->args[1]); 2273 s_mask = t1->s_mask & t2->s_mask; 2274 return fold_masks_s(ctx, op, s_mask); 2275 } 2276 2277 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op) 2278 { 2279 const TCGOpDef *def = &tcg_op_defs[op->opc]; 2280 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 2281 MemOp mop = get_memop(oi); 2282 int width = 8 * memop_size(mop); 2283 uint64_t z_mask = -1, s_mask = 0; 2284 2285 if (width < 64) { 2286 if (mop & MO_SIGN) { 2287 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1)); 2288 } else { 2289 z_mask = MAKE_64BIT_MASK(0, width); 2290 } 2291 } 2292 2293 /* Opcodes that touch guest memory stop the mb optimization. */ 2294 ctx->prev_mb = NULL; 2295 2296 return fold_masks_zs(ctx, op, z_mask, s_mask); 2297 } 2298 2299 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op) 2300 { 2301 /* Opcodes that touch guest memory stop the mb optimization. */ 2302 ctx->prev_mb = NULL; 2303 return finish_folding(ctx, op); 2304 } 2305 2306 static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 2307 { 2308 /* Opcodes that touch guest memory stop the mb optimization. */ 2309 ctx->prev_mb = NULL; 2310 return true; 2311 } 2312 2313 static bool fold_remainder(OptContext *ctx, TCGOp *op) 2314 { 2315 if (fold_const2(ctx, op) || 2316 fold_xx_to_i(ctx, op, 0)) { 2317 return true; 2318 } 2319 return finish_folding(ctx, op); 2320 } 2321 2322 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */ 2323 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) 2324 { 2325 uint64_t a_zmask, b_val; 2326 TCGCond cond; 2327 2328 if (!arg_is_const(op->args[2])) { 2329 return false; 2330 } 2331 2332 a_zmask = arg_info(op->args[1])->z_mask; 2333 b_val = arg_info(op->args[2])->val; 2334 cond = op->args[3]; 2335 2336 if (ctx->type == TCG_TYPE_I32) { 2337 a_zmask = (uint32_t)a_zmask; 2338 b_val = (uint32_t)b_val; 2339 } 2340 2341 /* 2342 * A with only low bits set vs B with high bits set means that A < B. 2343 */ 2344 if (a_zmask < b_val) { 2345 bool inv = false; 2346 2347 switch (cond) { 2348 case TCG_COND_NE: 2349 case TCG_COND_LEU: 2350 case TCG_COND_LTU: 2351 inv = true; 2352 /* fall through */ 2353 case TCG_COND_GTU: 2354 case TCG_COND_GEU: 2355 case TCG_COND_EQ: 2356 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv); 2357 default: 2358 break; 2359 } 2360 } 2361 2362 /* 2363 * A with only lsb set is already boolean. 2364 */ 2365 if (a_zmask <= 1) { 2366 bool convert = false; 2367 bool inv = false; 2368 2369 switch (cond) { 2370 case TCG_COND_EQ: 2371 inv = true; 2372 /* fall through */ 2373 case TCG_COND_NE: 2374 convert = (b_val == 0); 2375 break; 2376 case TCG_COND_LTU: 2377 case TCG_COND_TSTEQ: 2378 inv = true; 2379 /* fall through */ 2380 case TCG_COND_GEU: 2381 case TCG_COND_TSTNE: 2382 convert = (b_val == 1); 2383 break; 2384 default: 2385 break; 2386 } 2387 if (convert) { 2388 if (!inv && !neg) { 2389 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 2390 } 2391 2392 if (!inv) { 2393 op->opc = INDEX_op_neg; 2394 } else if (neg) { 2395 op->opc = INDEX_op_add; 2396 op->args[2] = arg_new_constant(ctx, -1); 2397 } else { 2398 op->opc = INDEX_op_xor; 2399 op->args[2] = arg_new_constant(ctx, 1); 2400 } 2401 return -1; 2402 } 2403 } 2404 return 0; 2405 } 2406 2407 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) 2408 { 2409 TCGCond cond = op->args[3]; 2410 TCGArg ret, src1, src2; 2411 TCGOp *op2; 2412 uint64_t val; 2413 int sh; 2414 bool inv; 2415 2416 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) { 2417 return; 2418 } 2419 2420 src2 = op->args[2]; 2421 val = arg_info(src2)->val; 2422 if (!is_power_of_2(val)) { 2423 return; 2424 } 2425 sh = ctz64(val); 2426 2427 ret = op->args[0]; 2428 src1 = op->args[1]; 2429 inv = cond == TCG_COND_TSTEQ; 2430 2431 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { 2432 op->opc = INDEX_op_sextract; 2433 op->args[1] = src1; 2434 op->args[2] = sh; 2435 op->args[3] = 1; 2436 return; 2437 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { 2438 op->opc = INDEX_op_extract; 2439 op->args[1] = src1; 2440 op->args[2] = sh; 2441 op->args[3] = 1; 2442 } else { 2443 if (sh) { 2444 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); 2445 op2->args[0] = ret; 2446 op2->args[1] = src1; 2447 op2->args[2] = arg_new_constant(ctx, sh); 2448 src1 = ret; 2449 } 2450 op->opc = INDEX_op_and; 2451 op->args[1] = src1; 2452 op->args[2] = arg_new_constant(ctx, 1); 2453 } 2454 2455 if (neg && inv) { 2456 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); 2457 op2->args[0] = ret; 2458 op2->args[1] = ret; 2459 op2->args[2] = arg_new_constant(ctx, -1); 2460 } else if (inv) { 2461 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); 2462 op2->args[0] = ret; 2463 op2->args[1] = ret; 2464 op2->args[2] = arg_new_constant(ctx, 1); 2465 } else if (neg) { 2466 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); 2467 op2->args[0] = ret; 2468 op2->args[1] = ret; 2469 } 2470 } 2471 2472 static bool fold_setcond(OptContext *ctx, TCGOp *op) 2473 { 2474 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], 2475 &op->args[2], &op->args[3]); 2476 if (i >= 0) { 2477 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2478 } 2479 2480 i = fold_setcond_zmask(ctx, op, false); 2481 if (i > 0) { 2482 return true; 2483 } 2484 if (i == 0) { 2485 fold_setcond_tst_pow2(ctx, op, false); 2486 } 2487 2488 return fold_masks_z(ctx, op, 1); 2489 } 2490 2491 static bool fold_negsetcond(OptContext *ctx, TCGOp *op) 2492 { 2493 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1], 2494 &op->args[2], &op->args[3]); 2495 if (i >= 0) { 2496 return tcg_opt_gen_movi(ctx, op, op->args[0], -i); 2497 } 2498 2499 i = fold_setcond_zmask(ctx, op, true); 2500 if (i > 0) { 2501 return true; 2502 } 2503 if (i == 0) { 2504 fold_setcond_tst_pow2(ctx, op, true); 2505 } 2506 2507 /* Value is {0,-1} so all bits are repetitions of the sign. */ 2508 return fold_masks_s(ctx, op, -1); 2509 } 2510 2511 static bool fold_setcond2(OptContext *ctx, TCGOp *op) 2512 { 2513 TCGCond cond; 2514 int i, inv = 0; 2515 2516 i = do_constant_folding_cond2(ctx, op, &op->args[1]); 2517 cond = op->args[5]; 2518 if (i >= 0) { 2519 goto do_setcond_const; 2520 } 2521 2522 switch (cond) { 2523 case TCG_COND_LT: 2524 case TCG_COND_GE: 2525 /* 2526 * Simplify LT/GE comparisons vs zero to a single compare 2527 * vs the high word of the input. 2528 */ 2529 if (arg_is_const_val(op->args[3], 0) && 2530 arg_is_const_val(op->args[4], 0)) { 2531 goto do_setcond_high; 2532 } 2533 break; 2534 2535 case TCG_COND_NE: 2536 inv = 1; 2537 QEMU_FALLTHROUGH; 2538 case TCG_COND_EQ: 2539 /* 2540 * Simplify EQ/NE comparisons where one of the pairs 2541 * can be simplified. 2542 */ 2543 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 2544 op->args[3], cond); 2545 switch (i ^ inv) { 2546 case 0: 2547 goto do_setcond_const; 2548 case 1: 2549 goto do_setcond_high; 2550 } 2551 2552 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 2553 op->args[4], cond); 2554 switch (i ^ inv) { 2555 case 0: 2556 goto do_setcond_const; 2557 case 1: 2558 goto do_setcond_low; 2559 } 2560 break; 2561 2562 case TCG_COND_TSTEQ: 2563 case TCG_COND_TSTNE: 2564 if (arg_is_const_val(op->args[3], 0)) { 2565 goto do_setcond_high; 2566 } 2567 if (arg_is_const_val(op->args[4], 0)) { 2568 goto do_setcond_low; 2569 } 2570 break; 2571 2572 default: 2573 break; 2574 2575 do_setcond_low: 2576 op->args[2] = op->args[3]; 2577 op->args[3] = cond; 2578 op->opc = INDEX_op_setcond; 2579 return fold_setcond(ctx, op); 2580 2581 do_setcond_high: 2582 op->args[1] = op->args[2]; 2583 op->args[2] = op->args[4]; 2584 op->args[3] = cond; 2585 op->opc = INDEX_op_setcond; 2586 return fold_setcond(ctx, op); 2587 } 2588 2589 return fold_masks_z(ctx, op, 1); 2590 2591 do_setcond_const: 2592 return tcg_opt_gen_movi(ctx, op, op->args[0], i); 2593 } 2594 2595 static bool fold_sextract(OptContext *ctx, TCGOp *op) 2596 { 2597 uint64_t z_mask, s_mask, s_mask_old; 2598 TempOptInfo *t1 = arg_info(op->args[1]); 2599 int pos = op->args[2]; 2600 int len = op->args[3]; 2601 2602 if (ti_is_const(t1)) { 2603 return tcg_opt_gen_movi(ctx, op, op->args[0], 2604 sextract64(ti_const_val(t1), pos, len)); 2605 } 2606 2607 s_mask_old = t1->s_mask; 2608 s_mask = s_mask_old >> pos; 2609 s_mask |= -1ull << (len - 1); 2610 2611 if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) { 2612 return true; 2613 } 2614 2615 z_mask = sextract64(t1->z_mask, pos, len); 2616 return fold_masks_zs(ctx, op, z_mask, s_mask); 2617 } 2618 2619 static bool fold_shift(OptContext *ctx, TCGOp *op) 2620 { 2621 uint64_t s_mask, z_mask; 2622 TempOptInfo *t1, *t2; 2623 2624 if (fold_const2(ctx, op) || 2625 fold_ix_to_i(ctx, op, 0) || 2626 fold_xi_to_x(ctx, op, 0)) { 2627 return true; 2628 } 2629 2630 t1 = arg_info(op->args[1]); 2631 t2 = arg_info(op->args[2]); 2632 s_mask = t1->s_mask; 2633 z_mask = t1->z_mask; 2634 2635 if (ti_is_const(t2)) { 2636 int sh = ti_const_val(t2); 2637 2638 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 2639 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 2640 2641 return fold_masks_zs(ctx, op, z_mask, s_mask); 2642 } 2643 2644 switch (op->opc) { 2645 case INDEX_op_sar: 2646 /* 2647 * Arithmetic right shift will not reduce the number of 2648 * input sign repetitions. 2649 */ 2650 return fold_masks_s(ctx, op, s_mask); 2651 case INDEX_op_shr: 2652 /* 2653 * If the sign bit is known zero, then logical right shift 2654 * will not reduce the number of input sign repetitions. 2655 */ 2656 if (~z_mask & -s_mask) { 2657 return fold_masks_s(ctx, op, s_mask); 2658 } 2659 break; 2660 default: 2661 break; 2662 } 2663 2664 return finish_folding(ctx, op); 2665 } 2666 2667 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 2668 { 2669 TCGOpcode neg_op; 2670 bool have_neg; 2671 2672 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { 2673 return false; 2674 } 2675 2676 switch (ctx->type) { 2677 case TCG_TYPE_I32: 2678 case TCG_TYPE_I64: 2679 neg_op = INDEX_op_neg; 2680 have_neg = true; 2681 break; 2682 case TCG_TYPE_V64: 2683 case TCG_TYPE_V128: 2684 case TCG_TYPE_V256: 2685 neg_op = INDEX_op_neg_vec; 2686 have_neg = (TCG_TARGET_HAS_neg_vec && 2687 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 2688 break; 2689 default: 2690 g_assert_not_reached(); 2691 } 2692 if (have_neg) { 2693 op->opc = neg_op; 2694 op->args[1] = op->args[2]; 2695 return fold_neg_no_const(ctx, op); 2696 } 2697 return false; 2698 } 2699 2700 /* We cannot as yet do_constant_folding with vectors. */ 2701 static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 2702 { 2703 if (fold_xx_to_i(ctx, op, 0) || 2704 fold_xi_to_x(ctx, op, 0) || 2705 fold_sub_to_neg(ctx, op)) { 2706 return true; 2707 } 2708 return finish_folding(ctx, op); 2709 } 2710 2711 static bool fold_sub(OptContext *ctx, TCGOp *op) 2712 { 2713 if (fold_const2(ctx, op) || 2714 fold_xx_to_i(ctx, op, 0) || 2715 fold_xi_to_x(ctx, op, 0) || 2716 fold_sub_to_neg(ctx, op)) { 2717 return true; 2718 } 2719 2720 /* Fold sub r,x,i to add r,x,-i */ 2721 if (arg_is_const(op->args[2])) { 2722 uint64_t val = arg_info(op->args[2])->val; 2723 2724 op->opc = INDEX_op_add; 2725 op->args[2] = arg_new_constant(ctx, -val); 2726 } 2727 return finish_folding(ctx, op); 2728 } 2729 2730 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) 2731 { 2732 TempOptInfo *t2; 2733 2734 op = QTAILQ_PREV(op, link); 2735 switch (op->opc) { 2736 case INDEX_op_subbo: 2737 op->opc = INDEX_op_sub; 2738 fold_sub(ctx, op); 2739 break; 2740 case INDEX_op_subbio: 2741 op->opc = INDEX_op_subbi; 2742 break; 2743 case INDEX_op_subb1o: 2744 t2 = arg_info(op->args[2]); 2745 if (ti_is_const(t2)) { 2746 op->opc = INDEX_op_add; 2747 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); 2748 /* Perform other constant folding, if needed. */ 2749 fold_add(ctx, op); 2750 } else { 2751 TCGArg ret = op->args[0]; 2752 op->opc = INDEX_op_sub; 2753 op = opt_insert_after(ctx, op, INDEX_op_add, 3); 2754 op->args[0] = ret; 2755 op->args[1] = ret; 2756 op->args[2] = arg_new_constant(ctx, -1); 2757 } 2758 break; 2759 default: 2760 g_assert_not_reached(); 2761 } 2762 } 2763 2764 static bool fold_subbi(OptContext *ctx, TCGOp *op) 2765 { 2766 TempOptInfo *t2; 2767 int borrow_in = ctx->carry_state; 2768 2769 if (borrow_in < 0) { 2770 return finish_folding(ctx, op); 2771 } 2772 ctx->carry_state = -1; 2773 2774 squash_prev_borrowout(ctx, op); 2775 if (borrow_in == 0) { 2776 op->opc = INDEX_op_sub; 2777 return fold_sub(ctx, op); 2778 } 2779 2780 /* 2781 * Propagate the known carry-in into any constant, then negate to 2782 * transform from sub to add. If there is no constant, emit a 2783 * separate add -1. 2784 */ 2785 t2 = arg_info(op->args[2]); 2786 if (ti_is_const(t2)) { 2787 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); 2788 } else { 2789 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); 2790 2791 op2->args[0] = op->args[0]; 2792 op2->args[1] = op->args[1]; 2793 op2->args[2] = op->args[2]; 2794 fold_sub(ctx, op2); 2795 2796 op->args[1] = op->args[0]; 2797 op->args[2] = arg_new_constant(ctx, -1); 2798 } 2799 op->opc = INDEX_op_add; 2800 return fold_add(ctx, op); 2801 } 2802 2803 static bool fold_subbio(OptContext *ctx, TCGOp *op) 2804 { 2805 TempOptInfo *t1, *t2; 2806 int borrow_out = -1; 2807 2808 if (ctx->carry_state < 0) { 2809 return finish_folding(ctx, op); 2810 } 2811 2812 squash_prev_borrowout(ctx, op); 2813 if (ctx->carry_state == 0) { 2814 goto do_subbo; 2815 } 2816 2817 t1 = arg_info(op->args[1]); 2818 t2 = arg_info(op->args[2]); 2819 2820 /* Propagate the known borrow-in into a constant, if possible. */ 2821 if (ti_is_const(t2)) { 2822 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; 2823 uint64_t v = ti_const_val(t2) & max; 2824 2825 if (v < max) { 2826 op->args[2] = arg_new_constant(ctx, v + 1); 2827 goto do_subbo; 2828 } 2829 /* subtracting max + 1 produces known borrow out. */ 2830 borrow_out = 1; 2831 } 2832 if (ti_is_const(t1)) { 2833 uint64_t v = ti_const_val(t1); 2834 if (v != 0) { 2835 op->args[2] = arg_new_constant(ctx, v - 1); 2836 goto do_subbo; 2837 } 2838 } 2839 2840 /* Adjust the opcode to remember the known carry-in. */ 2841 op->opc = INDEX_op_subb1o; 2842 ctx->carry_state = borrow_out; 2843 return finish_folding(ctx, op); 2844 2845 do_subbo: 2846 op->opc = INDEX_op_subbo; 2847 return fold_subbo(ctx, op); 2848 } 2849 2850 static bool fold_subbo(OptContext *ctx, TCGOp *op) 2851 { 2852 TempOptInfo *t1 = arg_info(op->args[1]); 2853 TempOptInfo *t2 = arg_info(op->args[2]); 2854 int borrow_out = -1; 2855 2856 if (ti_is_const(t2)) { 2857 uint64_t v2 = ti_const_val(t2); 2858 if (v2 == 0) { 2859 borrow_out = 0; 2860 } else if (ti_is_const(t1)) { 2861 uint64_t v1 = ti_const_val(t1); 2862 borrow_out = v1 < v2; 2863 } 2864 } 2865 ctx->carry_state = borrow_out; 2866 return finish_folding(ctx, op); 2867 } 2868 2869 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 2870 { 2871 uint64_t z_mask = -1, s_mask = 0; 2872 2873 /* We can't do any folding with a load, but we can record bits. */ 2874 switch (op->opc) { 2875 case INDEX_op_ld8s: 2876 s_mask = INT8_MIN; 2877 break; 2878 case INDEX_op_ld8u: 2879 z_mask = MAKE_64BIT_MASK(0, 8); 2880 break; 2881 case INDEX_op_ld16s: 2882 s_mask = INT16_MIN; 2883 break; 2884 case INDEX_op_ld16u: 2885 z_mask = MAKE_64BIT_MASK(0, 16); 2886 break; 2887 case INDEX_op_ld32s: 2888 s_mask = INT32_MIN; 2889 break; 2890 case INDEX_op_ld32u: 2891 z_mask = MAKE_64BIT_MASK(0, 32); 2892 break; 2893 default: 2894 g_assert_not_reached(); 2895 } 2896 return fold_masks_zs(ctx, op, z_mask, s_mask); 2897 } 2898 2899 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) 2900 { 2901 TCGTemp *dst, *src; 2902 intptr_t ofs; 2903 TCGType type; 2904 2905 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2906 return finish_folding(ctx, op); 2907 } 2908 2909 type = ctx->type; 2910 ofs = op->args[2]; 2911 dst = arg_temp(op->args[0]); 2912 src = find_mem_copy_for(ctx, type, ofs); 2913 if (src && src->base_type == type) { 2914 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src)); 2915 } 2916 2917 reset_ts(ctx, dst); 2918 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1); 2919 return true; 2920 } 2921 2922 static bool fold_tcg_st(OptContext *ctx, TCGOp *op) 2923 { 2924 intptr_t ofs = op->args[2]; 2925 intptr_t lm1; 2926 2927 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2928 remove_mem_copy_all(ctx); 2929 return true; 2930 } 2931 2932 switch (op->opc) { 2933 case INDEX_op_st8: 2934 lm1 = 0; 2935 break; 2936 case INDEX_op_st16: 2937 lm1 = 1; 2938 break; 2939 case INDEX_op_st32: 2940 lm1 = 3; 2941 break; 2942 case INDEX_op_st: 2943 case INDEX_op_st_vec: 2944 lm1 = tcg_type_size(ctx->type) - 1; 2945 break; 2946 default: 2947 g_assert_not_reached(); 2948 } 2949 remove_mem_copy_in(ctx, ofs, ofs + lm1); 2950 return true; 2951 } 2952 2953 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) 2954 { 2955 TCGTemp *src; 2956 intptr_t ofs, last; 2957 TCGType type; 2958 2959 if (op->args[1] != tcgv_ptr_arg(tcg_env)) { 2960 return fold_tcg_st(ctx, op); 2961 } 2962 2963 src = arg_temp(op->args[0]); 2964 ofs = op->args[2]; 2965 type = ctx->type; 2966 2967 /* 2968 * Eliminate duplicate stores of a constant. 2969 * This happens frequently when the target ISA zero-extends. 2970 */ 2971 if (ts_is_const(src)) { 2972 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs); 2973 if (src == prev) { 2974 tcg_op_remove(ctx->tcg, op); 2975 return true; 2976 } 2977 } 2978 2979 last = ofs + tcg_type_size(type) - 1; 2980 remove_mem_copy_in(ctx, ofs, last); 2981 record_mem_copy(ctx, type, src, ofs, last); 2982 return true; 2983 } 2984 2985 static bool fold_xor(OptContext *ctx, TCGOp *op) 2986 { 2987 uint64_t z_mask, s_mask; 2988 TempOptInfo *t1, *t2; 2989 2990 if (fold_const2_commutative(ctx, op) || 2991 fold_xx_to_i(ctx, op, 0) || 2992 fold_xi_to_x(ctx, op, 0) || 2993 fold_xi_to_not(ctx, op, -1)) { 2994 return true; 2995 } 2996 2997 t1 = arg_info(op->args[1]); 2998 t2 = arg_info(op->args[2]); 2999 z_mask = t1->z_mask | t2->z_mask; 3000 s_mask = t1->s_mask & t2->s_mask; 3001 return fold_masks_zs(ctx, op, z_mask, s_mask); 3002 } 3003 3004 /* Propagate constants and copies, fold constant expressions. */ 3005 void tcg_optimize(TCGContext *s) 3006 { 3007 int nb_temps, i; 3008 TCGOp *op, *op_next; 3009 OptContext ctx = { .tcg = s }; 3010 3011 QSIMPLEQ_INIT(&ctx.mem_free); 3012 3013 /* Array VALS has an element for each temp. 3014 If this temp holds a constant then its value is kept in VALS' element. 3015 If this temp is a copy of other ones then the other copies are 3016 available through the doubly linked circular list. */ 3017 3018 nb_temps = s->nb_temps; 3019 for (i = 0; i < nb_temps; ++i) { 3020 s->temps[i].state_ptr = NULL; 3021 } 3022 3023 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3024 TCGOpcode opc = op->opc; 3025 const TCGOpDef *def; 3026 bool done = false; 3027 3028 /* Calls are special. */ 3029 if (opc == INDEX_op_call) { 3030 fold_call(&ctx, op); 3031 continue; 3032 } 3033 3034 def = &tcg_op_defs[opc]; 3035 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 3036 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 3037 3038 /* Pre-compute the type of the operation. */ 3039 ctx.type = TCGOP_TYPE(op); 3040 3041 /* 3042 * Process each opcode. 3043 * Sorted alphabetically by opcode as much as possible. 3044 */ 3045 switch (opc) { 3046 case INDEX_op_add: 3047 done = fold_add(&ctx, op); 3048 break; 3049 case INDEX_op_add_vec: 3050 done = fold_add_vec(&ctx, op); 3051 break; 3052 case INDEX_op_addci: 3053 done = fold_addci(&ctx, op); 3054 break; 3055 case INDEX_op_addcio: 3056 done = fold_addcio(&ctx, op); 3057 break; 3058 case INDEX_op_addco: 3059 done = fold_addco(&ctx, op); 3060 break; 3061 case INDEX_op_and: 3062 case INDEX_op_and_vec: 3063 done = fold_and(&ctx, op); 3064 break; 3065 case INDEX_op_andc: 3066 case INDEX_op_andc_vec: 3067 done = fold_andc(&ctx, op); 3068 break; 3069 case INDEX_op_brcond: 3070 done = fold_brcond(&ctx, op); 3071 break; 3072 case INDEX_op_brcond2_i32: 3073 done = fold_brcond2(&ctx, op); 3074 break; 3075 case INDEX_op_bswap16: 3076 case INDEX_op_bswap32: 3077 case INDEX_op_bswap64: 3078 done = fold_bswap(&ctx, op); 3079 break; 3080 case INDEX_op_clz: 3081 case INDEX_op_ctz: 3082 done = fold_count_zeros(&ctx, op); 3083 break; 3084 case INDEX_op_ctpop: 3085 done = fold_ctpop(&ctx, op); 3086 break; 3087 case INDEX_op_deposit: 3088 done = fold_deposit(&ctx, op); 3089 break; 3090 case INDEX_op_divs: 3091 case INDEX_op_divu: 3092 done = fold_divide(&ctx, op); 3093 break; 3094 case INDEX_op_dup_vec: 3095 done = fold_dup(&ctx, op); 3096 break; 3097 case INDEX_op_dup2_vec: 3098 done = fold_dup2(&ctx, op); 3099 break; 3100 case INDEX_op_eqv: 3101 case INDEX_op_eqv_vec: 3102 done = fold_eqv(&ctx, op); 3103 break; 3104 case INDEX_op_extract: 3105 done = fold_extract(&ctx, op); 3106 break; 3107 case INDEX_op_extract2: 3108 done = fold_extract2(&ctx, op); 3109 break; 3110 case INDEX_op_ext_i32_i64: 3111 done = fold_exts(&ctx, op); 3112 break; 3113 case INDEX_op_extu_i32_i64: 3114 case INDEX_op_extrl_i64_i32: 3115 case INDEX_op_extrh_i64_i32: 3116 done = fold_extu(&ctx, op); 3117 break; 3118 case INDEX_op_ld8s: 3119 case INDEX_op_ld8u: 3120 case INDEX_op_ld16s: 3121 case INDEX_op_ld16u: 3122 case INDEX_op_ld32s: 3123 case INDEX_op_ld32u: 3124 done = fold_tcg_ld(&ctx, op); 3125 break; 3126 case INDEX_op_ld: 3127 case INDEX_op_ld_vec: 3128 done = fold_tcg_ld_memcopy(&ctx, op); 3129 break; 3130 case INDEX_op_st8: 3131 case INDEX_op_st16: 3132 case INDEX_op_st32: 3133 done = fold_tcg_st(&ctx, op); 3134 break; 3135 case INDEX_op_st: 3136 case INDEX_op_st_vec: 3137 done = fold_tcg_st_memcopy(&ctx, op); 3138 break; 3139 case INDEX_op_mb: 3140 done = fold_mb(&ctx, op); 3141 break; 3142 case INDEX_op_mov: 3143 case INDEX_op_mov_vec: 3144 done = fold_mov(&ctx, op); 3145 break; 3146 case INDEX_op_movcond: 3147 done = fold_movcond(&ctx, op); 3148 break; 3149 case INDEX_op_mul: 3150 done = fold_mul(&ctx, op); 3151 break; 3152 case INDEX_op_mulsh: 3153 case INDEX_op_muluh: 3154 done = fold_mul_highpart(&ctx, op); 3155 break; 3156 case INDEX_op_muls2: 3157 case INDEX_op_mulu2: 3158 done = fold_multiply2(&ctx, op); 3159 break; 3160 case INDEX_op_nand: 3161 case INDEX_op_nand_vec: 3162 done = fold_nand(&ctx, op); 3163 break; 3164 case INDEX_op_neg: 3165 done = fold_neg(&ctx, op); 3166 break; 3167 case INDEX_op_nor: 3168 case INDEX_op_nor_vec: 3169 done = fold_nor(&ctx, op); 3170 break; 3171 case INDEX_op_not: 3172 case INDEX_op_not_vec: 3173 done = fold_not(&ctx, op); 3174 break; 3175 case INDEX_op_or: 3176 case INDEX_op_or_vec: 3177 done = fold_or(&ctx, op); 3178 break; 3179 case INDEX_op_orc: 3180 case INDEX_op_orc_vec: 3181 done = fold_orc(&ctx, op); 3182 break; 3183 case INDEX_op_qemu_ld: 3184 done = fold_qemu_ld_1reg(&ctx, op); 3185 break; 3186 case INDEX_op_qemu_ld2: 3187 done = fold_qemu_ld_2reg(&ctx, op); 3188 break; 3189 case INDEX_op_qemu_st: 3190 case INDEX_op_qemu_st2: 3191 done = fold_qemu_st(&ctx, op); 3192 break; 3193 case INDEX_op_rems: 3194 case INDEX_op_remu: 3195 done = fold_remainder(&ctx, op); 3196 break; 3197 case INDEX_op_rotl: 3198 case INDEX_op_rotr: 3199 case INDEX_op_sar: 3200 case INDEX_op_shl: 3201 case INDEX_op_shr: 3202 done = fold_shift(&ctx, op); 3203 break; 3204 case INDEX_op_setcond: 3205 done = fold_setcond(&ctx, op); 3206 break; 3207 case INDEX_op_negsetcond: 3208 done = fold_negsetcond(&ctx, op); 3209 break; 3210 case INDEX_op_setcond2_i32: 3211 done = fold_setcond2(&ctx, op); 3212 break; 3213 case INDEX_op_cmp_vec: 3214 done = fold_cmp_vec(&ctx, op); 3215 break; 3216 case INDEX_op_cmpsel_vec: 3217 done = fold_cmpsel_vec(&ctx, op); 3218 break; 3219 case INDEX_op_bitsel_vec: 3220 done = fold_bitsel_vec(&ctx, op); 3221 break; 3222 case INDEX_op_sextract: 3223 done = fold_sextract(&ctx, op); 3224 break; 3225 case INDEX_op_sub: 3226 done = fold_sub(&ctx, op); 3227 break; 3228 case INDEX_op_subbi: 3229 done = fold_subbi(&ctx, op); 3230 break; 3231 case INDEX_op_subbio: 3232 done = fold_subbio(&ctx, op); 3233 break; 3234 case INDEX_op_subbo: 3235 done = fold_subbo(&ctx, op); 3236 break; 3237 case INDEX_op_sub_vec: 3238 done = fold_sub_vec(&ctx, op); 3239 break; 3240 case INDEX_op_xor: 3241 case INDEX_op_xor_vec: 3242 done = fold_xor(&ctx, op); 3243 break; 3244 case INDEX_op_set_label: 3245 case INDEX_op_br: 3246 case INDEX_op_exit_tb: 3247 case INDEX_op_goto_tb: 3248 case INDEX_op_goto_ptr: 3249 finish_ebb(&ctx); 3250 done = true; 3251 break; 3252 default: 3253 done = finish_folding(&ctx, op); 3254 break; 3255 } 3256 tcg_debug_assert(done); 3257 } 3258 } 3259