1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2018 Linaro, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg.h" 22 #include "tcg/tcg-temp-internal.h" 23 #include "tcg/tcg-op-common.h" 24 #include "tcg/tcg-mo.h" 25 #include "tcg-internal.h" 26 #include "tcg-has.h" 27 28 /* 29 * Vector optional opcode tracking. 30 * Except for the basic logical operations (and, or, xor), and 31 * data movement (mov, ld, st, dupi), many vector opcodes are 32 * optional and may not be supported on the host. Thank Intel 33 * for the irregularity in their instruction set. 34 * 35 * The gvec expanders allow custom vector operations to be composed, 36 * generally via the .fniv callback in the GVecGen* structures. At 37 * the same time, in deciding whether to use this hook we need to 38 * know if the host supports the required operations. This is 39 * presented as an array of opcodes, terminated by 0. Each opcode 40 * is assumed to be expanded with the given VECE. 41 * 42 * For debugging, we want to validate this array. Therefore, when 43 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders 44 * will validate that their opcode is present in the list. 45 */ 46 static void tcg_assert_listed_vecop(TCGOpcode op) 47 { 48 #ifdef CONFIG_DEBUG_TCG 49 const TCGOpcode *p = tcg_ctx->vecop_list; 50 if (p) { 51 for (; *p; ++p) { 52 if (*p == op) { 53 return; 54 } 55 } 56 g_assert_not_reached(); 57 } 58 #endif 59 } 60 61 bool tcg_can_emit_vecop_list(const TCGOpcode *list, 62 TCGType type, unsigned vece) 63 { 64 if (list == NULL) { 65 return true; 66 } 67 68 for (; *list; ++list) { 69 TCGOpcode opc = *list; 70 71 #ifdef CONFIG_DEBUG_TCG 72 switch (opc) { 73 case INDEX_op_and_vec: 74 case INDEX_op_or_vec: 75 case INDEX_op_xor_vec: 76 case INDEX_op_mov_vec: 77 case INDEX_op_dup_vec: 78 case INDEX_op_dup2_vec: 79 case INDEX_op_ld_vec: 80 case INDEX_op_st_vec: 81 case INDEX_op_bitsel_vec: 82 /* These opcodes are mandatory and should not be listed. */ 83 g_assert_not_reached(); 84 case INDEX_op_not_vec: 85 /* These opcodes have generic expansions using the above. */ 86 g_assert_not_reached(); 87 default: 88 break; 89 } 90 #endif 91 92 if (tcg_can_emit_vec_op(opc, type, vece)) { 93 continue; 94 } 95 96 /* 97 * The opcode list is created by front ends based on what they 98 * actually invoke. We must mirror the logic in the routines 99 * below for generic expansions using other opcodes. 100 */ 101 switch (opc) { 102 case INDEX_op_neg_vec: 103 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { 104 continue; 105 } 106 break; 107 case INDEX_op_abs_vec: 108 if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece) 109 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0 110 || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0 111 || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) { 112 continue; 113 } 114 break; 115 case INDEX_op_usadd_vec: 116 if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) || 117 tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 118 continue; 119 } 120 break; 121 case INDEX_op_ussub_vec: 122 if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) || 123 tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 124 continue; 125 } 126 break; 127 case INDEX_op_cmpsel_vec: 128 case INDEX_op_smin_vec: 129 case INDEX_op_smax_vec: 130 case INDEX_op_umin_vec: 131 case INDEX_op_umax_vec: 132 if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) { 133 continue; 134 } 135 break; 136 default: 137 break; 138 } 139 return false; 140 } 141 return true; 142 } 143 144 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) 145 { 146 TCGOp *op = tcg_emit_op(opc, 2); 147 TCGOP_TYPE(op) = type; 148 TCGOP_VECE(op) = vece; 149 op->args[0] = r; 150 op->args[1] = a; 151 } 152 153 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, 154 TCGArg r, TCGArg a, TCGArg b) 155 { 156 TCGOp *op = tcg_emit_op(opc, 3); 157 TCGOP_TYPE(op) = type; 158 TCGOP_VECE(op) = vece; 159 op->args[0] = r; 160 op->args[1] = a; 161 op->args[2] = b; 162 } 163 164 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, 165 TCGArg r, TCGArg a, TCGArg b, TCGArg c) 166 { 167 TCGOp *op = tcg_emit_op(opc, 4); 168 TCGOP_TYPE(op) = type; 169 TCGOP_VECE(op) = vece; 170 op->args[0] = r; 171 op->args[1] = a; 172 op->args[2] = b; 173 op->args[3] = c; 174 } 175 176 void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, 177 TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) 178 { 179 TCGOp *op = tcg_emit_op(opc, 6); 180 TCGOP_TYPE(op) = type; 181 TCGOP_VECE(op) = vece; 182 op->args[0] = r; 183 op->args[1] = a; 184 op->args[2] = b; 185 op->args[3] = c; 186 op->args[4] = d; 187 op->args[5] = e; 188 } 189 190 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a) 191 { 192 TCGTemp *rt = tcgv_vec_temp(r); 193 TCGTemp *at = tcgv_vec_temp(a); 194 TCGType type = rt->base_type; 195 196 /* Must enough inputs for the output. */ 197 tcg_debug_assert(at->base_type >= type); 198 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at)); 199 } 200 201 static void vec_gen_op3(TCGOpcode opc, unsigned vece, 202 TCGv_vec r, TCGv_vec a, TCGv_vec b) 203 { 204 TCGTemp *rt = tcgv_vec_temp(r); 205 TCGTemp *at = tcgv_vec_temp(a); 206 TCGTemp *bt = tcgv_vec_temp(b); 207 TCGType type = rt->base_type; 208 209 /* Must enough inputs for the output. */ 210 tcg_debug_assert(at->base_type >= type); 211 tcg_debug_assert(bt->base_type >= type); 212 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt)); 213 } 214 215 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a) 216 { 217 if (r != a) { 218 vec_gen_op2(INDEX_op_mov_vec, 0, r, a); 219 } 220 } 221 222 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a) 223 { 224 TCGTemp *rt = tcgv_vec_temp(r); 225 tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a)); 226 } 227 228 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a) 229 { 230 TCGArg ri = tcgv_vec_arg(r); 231 TCGTemp *rt = arg_temp(ri); 232 TCGType type = rt->base_type; 233 234 if (TCG_TARGET_REG_BITS == 64) { 235 TCGArg ai = tcgv_i64_arg(a); 236 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 237 } else if (vece == MO_64) { 238 TCGArg al = tcgv_i32_arg(TCGV_LOW(a)); 239 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a)); 240 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah); 241 } else { 242 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a)); 243 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 244 } 245 } 246 247 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a) 248 { 249 TCGArg ri = tcgv_vec_arg(r); 250 TCGArg ai = tcgv_i32_arg(a); 251 TCGTemp *rt = arg_temp(ri); 252 TCGType type = rt->base_type; 253 254 vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai); 255 } 256 257 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b, 258 tcg_target_long ofs) 259 { 260 TCGArg ri = tcgv_vec_arg(r); 261 TCGArg bi = tcgv_ptr_arg(b); 262 TCGTemp *rt = arg_temp(ri); 263 TCGType type = rt->base_type; 264 265 vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs); 266 } 267 268 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o) 269 { 270 TCGArg ri = tcgv_vec_arg(r); 271 TCGArg bi = tcgv_ptr_arg(b); 272 TCGTemp *rt = arg_temp(ri); 273 TCGType type = rt->base_type; 274 275 vec_gen_3(opc, type, 0, ri, bi, o); 276 } 277 278 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 279 { 280 vec_gen_ldst(INDEX_op_ld_vec, r, b, o); 281 } 282 283 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o) 284 { 285 vec_gen_ldst(INDEX_op_st_vec, r, b, o); 286 } 287 288 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type) 289 { 290 TCGArg ri = tcgv_vec_arg(r); 291 TCGArg bi = tcgv_ptr_arg(b); 292 TCGTemp *rt = arg_temp(ri); 293 TCGType type = rt->base_type; 294 295 tcg_debug_assert(low_type >= TCG_TYPE_V64); 296 tcg_debug_assert(low_type <= type); 297 vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o); 298 } 299 300 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 301 { 302 vec_gen_op3(INDEX_op_and_vec, 0, r, a, b); 303 } 304 305 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 306 { 307 vec_gen_op3(INDEX_op_or_vec, 0, r, a, b); 308 } 309 310 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 311 { 312 vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b); 313 } 314 315 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 316 { 317 if (TCG_TARGET_HAS_andc_vec) { 318 vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b); 319 } else { 320 TCGv_vec t = tcg_temp_new_vec_matching(r); 321 tcg_gen_not_vec(0, t, b); 322 tcg_gen_and_vec(0, r, a, t); 323 tcg_temp_free_vec(t); 324 } 325 } 326 327 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 328 { 329 if (TCG_TARGET_HAS_orc_vec) { 330 vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b); 331 } else { 332 TCGv_vec t = tcg_temp_new_vec_matching(r); 333 tcg_gen_not_vec(0, t, b); 334 tcg_gen_or_vec(0, r, a, t); 335 tcg_temp_free_vec(t); 336 } 337 } 338 339 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 340 { 341 if (TCG_TARGET_HAS_nand_vec) { 342 vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b); 343 } else { 344 tcg_gen_and_vec(0, r, a, b); 345 tcg_gen_not_vec(0, r, r); 346 } 347 } 348 349 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 350 { 351 if (TCG_TARGET_HAS_nor_vec) { 352 vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b); 353 } else { 354 tcg_gen_or_vec(0, r, a, b); 355 tcg_gen_not_vec(0, r, r); 356 } 357 } 358 359 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 360 { 361 if (TCG_TARGET_HAS_eqv_vec) { 362 vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b); 363 } else { 364 tcg_gen_xor_vec(0, r, a, b); 365 tcg_gen_not_vec(0, r, r); 366 } 367 } 368 369 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) 370 { 371 TCGTemp *rt = tcgv_vec_temp(r); 372 TCGTemp *at = tcgv_vec_temp(a); 373 TCGArg ri = temp_arg(rt); 374 TCGArg ai = temp_arg(at); 375 TCGType type = rt->base_type; 376 int can; 377 378 tcg_debug_assert(at->base_type >= type); 379 tcg_assert_listed_vecop(opc); 380 can = tcg_can_emit_vec_op(opc, type, vece); 381 if (can > 0) { 382 vec_gen_2(opc, type, vece, ri, ai); 383 } else if (can < 0) { 384 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 385 tcg_expand_vec_op(opc, type, vece, ri, ai); 386 tcg_swap_vecop_list(hold_list); 387 } else { 388 return false; 389 } 390 return true; 391 } 392 393 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 394 { 395 if (TCG_TARGET_HAS_not_vec) { 396 vec_gen_op2(INDEX_op_not_vec, 0, r, a); 397 } else { 398 tcg_gen_xor_vec(0, r, a, tcg_constant_vec_matching(r, 0, -1)); 399 } 400 } 401 402 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 403 { 404 const TCGOpcode *hold_list; 405 406 tcg_assert_listed_vecop(INDEX_op_neg_vec); 407 hold_list = tcg_swap_vecop_list(NULL); 408 409 if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) { 410 tcg_gen_sub_vec(vece, r, tcg_constant_vec_matching(r, vece, 0), a); 411 } 412 tcg_swap_vecop_list(hold_list); 413 } 414 415 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a) 416 { 417 const TCGOpcode *hold_list; 418 419 tcg_assert_listed_vecop(INDEX_op_abs_vec); 420 hold_list = tcg_swap_vecop_list(NULL); 421 422 if (!do_op2(vece, r, a, INDEX_op_abs_vec)) { 423 TCGType type = tcgv_vec_temp(r)->base_type; 424 TCGv_vec t = tcg_temp_new_vec(type); 425 426 tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)); 427 if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) { 428 tcg_gen_neg_vec(vece, t, a); 429 tcg_gen_smax_vec(vece, r, a, t); 430 } else { 431 if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) { 432 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1); 433 } else { 434 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, 435 tcg_constant_vec(type, vece, 0)); 436 } 437 tcg_gen_xor_vec(vece, r, a, t); 438 tcg_gen_sub_vec(vece, r, r, t); 439 } 440 441 tcg_temp_free_vec(t); 442 } 443 tcg_swap_vecop_list(hold_list); 444 } 445 446 static void do_shifti(TCGOpcode opc, unsigned vece, 447 TCGv_vec r, TCGv_vec a, int64_t i) 448 { 449 TCGTemp *rt = tcgv_vec_temp(r); 450 TCGTemp *at = tcgv_vec_temp(a); 451 TCGArg ri = temp_arg(rt); 452 TCGArg ai = temp_arg(at); 453 TCGType type = rt->base_type; 454 int can; 455 456 tcg_debug_assert(at->base_type == type); 457 tcg_debug_assert(i >= 0 && i < (8 << vece)); 458 tcg_assert_listed_vecop(opc); 459 460 if (i == 0) { 461 tcg_gen_mov_vec(r, a); 462 return; 463 } 464 465 can = tcg_can_emit_vec_op(opc, type, vece); 466 if (can > 0) { 467 vec_gen_3(opc, type, vece, ri, ai, i); 468 } else { 469 /* We leave the choice of expansion via scalar or vector shift 470 to the target. Often, but not always, dupi can feed a vector 471 shift easier than a scalar. */ 472 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 473 tcg_debug_assert(can < 0); 474 tcg_expand_vec_op(opc, type, vece, ri, ai, i); 475 tcg_swap_vecop_list(hold_list); 476 } 477 } 478 479 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 480 { 481 do_shifti(INDEX_op_shli_vec, vece, r, a, i); 482 } 483 484 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 485 { 486 do_shifti(INDEX_op_shri_vec, vece, r, a, i); 487 } 488 489 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 490 { 491 do_shifti(INDEX_op_sari_vec, vece, r, a, i); 492 } 493 494 void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 495 { 496 do_shifti(INDEX_op_rotli_vec, vece, r, a, i); 497 } 498 499 void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) 500 { 501 int bits = 8 << vece; 502 tcg_debug_assert(i >= 0 && i < bits); 503 do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1)); 504 } 505 506 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, 507 TCGv_vec r, TCGv_vec a, TCGv_vec b) 508 { 509 TCGTemp *rt = tcgv_vec_temp(r); 510 TCGTemp *at = tcgv_vec_temp(a); 511 TCGTemp *bt = tcgv_vec_temp(b); 512 TCGTemp *tt = NULL; 513 TCGArg ri = temp_arg(rt); 514 TCGArg ai = temp_arg(at); 515 TCGArg bi = temp_arg(bt); 516 TCGArg ti; 517 TCGType type = rt->base_type; 518 int can; 519 520 tcg_debug_assert(at->base_type >= type); 521 tcg_debug_assert(bt->base_type >= type); 522 tcg_assert_listed_vecop(INDEX_op_cmp_vec); 523 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); 524 525 if (!TCG_TARGET_HAS_tst_vec && is_tst_cond(cond)) { 526 tt = tcg_temp_new_internal(type, TEMP_EBB); 527 ti = temp_arg(tt); 528 vec_gen_3(INDEX_op_and_vec, type, 0, ti, ai, bi); 529 at = tt; 530 ai = ti; 531 bt = tcg_constant_internal(type, 0); 532 bi = temp_arg(bt); 533 cond = tcg_tst_eqne_cond(cond); 534 } 535 536 if (can > 0) { 537 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 538 } else { 539 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 540 tcg_debug_assert(can < 0); 541 tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); 542 tcg_swap_vecop_list(hold_list); 543 } 544 545 if (tt) { 546 tcg_temp_free_internal(tt); 547 } 548 } 549 550 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, 551 TCGv_vec b, TCGOpcode opc) 552 { 553 TCGTemp *rt = tcgv_vec_temp(r); 554 TCGTemp *at = tcgv_vec_temp(a); 555 TCGTemp *bt = tcgv_vec_temp(b); 556 TCGArg ri = temp_arg(rt); 557 TCGArg ai = temp_arg(at); 558 TCGArg bi = temp_arg(bt); 559 TCGType type = rt->base_type; 560 int can; 561 562 tcg_debug_assert(at->base_type >= type); 563 tcg_debug_assert(bt->base_type >= type); 564 tcg_assert_listed_vecop(opc); 565 can = tcg_can_emit_vec_op(opc, type, vece); 566 if (can > 0) { 567 vec_gen_3(opc, type, vece, ri, ai, bi); 568 } else if (can < 0) { 569 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 570 tcg_expand_vec_op(opc, type, vece, ri, ai, bi); 571 tcg_swap_vecop_list(hold_list); 572 } else { 573 return false; 574 } 575 return true; 576 } 577 578 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a, 579 TCGv_vec b, TCGOpcode opc) 580 { 581 bool ok = do_op3(vece, r, a, b, opc); 582 tcg_debug_assert(ok); 583 } 584 585 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 586 { 587 do_op3_nofail(vece, r, a, b, INDEX_op_add_vec); 588 } 589 590 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 591 { 592 do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec); 593 } 594 595 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 596 { 597 do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec); 598 } 599 600 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 601 { 602 do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec); 603 } 604 605 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 606 { 607 if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) { 608 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 609 TCGv_vec t = tcg_temp_new_vec_matching(r); 610 611 /* usadd(a, b) = min(a, ~b) + b */ 612 tcg_gen_not_vec(vece, t, b); 613 tcg_gen_umin_vec(vece, t, t, a); 614 tcg_gen_add_vec(vece, r, t, b); 615 616 tcg_temp_free_vec(t); 617 tcg_swap_vecop_list(hold_list); 618 } 619 } 620 621 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 622 { 623 do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec); 624 } 625 626 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 627 { 628 if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) { 629 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 630 TCGv_vec t = tcg_temp_new_vec_matching(r); 631 632 /* ussub(a, b) = max(a, b) - b */ 633 tcg_gen_umax_vec(vece, t, a, b); 634 tcg_gen_sub_vec(vece, r, t, b); 635 636 tcg_temp_free_vec(t); 637 tcg_swap_vecop_list(hold_list); 638 } 639 } 640 641 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a, 642 TCGv_vec b, TCGOpcode opc, TCGCond cond) 643 { 644 if (!do_op3(vece, r, a, b, opc)) { 645 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 646 tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b); 647 tcg_swap_vecop_list(hold_list); 648 } 649 } 650 651 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 652 { 653 do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT); 654 } 655 656 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 657 { 658 do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU); 659 } 660 661 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 662 { 663 do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT); 664 } 665 666 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 667 { 668 do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU); 669 } 670 671 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 672 { 673 do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec); 674 } 675 676 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 677 { 678 do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec); 679 } 680 681 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 682 { 683 do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec); 684 } 685 686 void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 687 { 688 do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec); 689 } 690 691 void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 692 { 693 do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec); 694 } 695 696 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a, 697 TCGv_i32 s, TCGOpcode opc) 698 { 699 TCGTemp *rt = tcgv_vec_temp(r); 700 TCGTemp *at = tcgv_vec_temp(a); 701 TCGTemp *st = tcgv_i32_temp(s); 702 TCGArg ri = temp_arg(rt); 703 TCGArg ai = temp_arg(at); 704 TCGArg si = temp_arg(st); 705 TCGType type = rt->base_type; 706 int can; 707 708 tcg_debug_assert(at->base_type >= type); 709 tcg_assert_listed_vecop(opc); 710 can = tcg_can_emit_vec_op(opc, type, vece); 711 if (can > 0) { 712 vec_gen_3(opc, type, vece, ri, ai, si); 713 } else if (can < 0) { 714 const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); 715 tcg_expand_vec_op(opc, type, vece, ri, ai, si); 716 tcg_swap_vecop_list(hold_list); 717 } else { 718 g_assert_not_reached(); 719 } 720 } 721 722 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 723 { 724 do_shifts(vece, r, a, b, INDEX_op_shls_vec); 725 } 726 727 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 728 { 729 do_shifts(vece, r, a, b, INDEX_op_shrs_vec); 730 } 731 732 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) 733 { 734 do_shifts(vece, r, a, b, INDEX_op_sars_vec); 735 } 736 737 void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s) 738 { 739 do_shifts(vece, r, a, s, INDEX_op_rotls_vec); 740 } 741 742 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 743 TCGv_vec b, TCGv_vec c) 744 { 745 TCGTemp *rt = tcgv_vec_temp(r); 746 TCGTemp *at = tcgv_vec_temp(a); 747 TCGTemp *bt = tcgv_vec_temp(b); 748 TCGTemp *ct = tcgv_vec_temp(c); 749 TCGType type = rt->base_type; 750 751 tcg_debug_assert(at->base_type >= type); 752 tcg_debug_assert(bt->base_type >= type); 753 tcg_debug_assert(ct->base_type >= type); 754 755 if (TCG_TARGET_HAS_bitsel_vec) { 756 vec_gen_4(INDEX_op_bitsel_vec, type, MO_8, 757 temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct)); 758 } else { 759 TCGv_vec t = tcg_temp_new_vec(type); 760 tcg_gen_and_vec(MO_8, t, a, b); 761 tcg_gen_andc_vec(MO_8, r, c, a); 762 tcg_gen_or_vec(MO_8, r, r, t); 763 tcg_temp_free_vec(t); 764 } 765 } 766 767 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r, 768 TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d) 769 { 770 TCGTemp *rt = tcgv_vec_temp(r); 771 TCGTemp *at = tcgv_vec_temp(a); 772 TCGTemp *bt = tcgv_vec_temp(b); 773 TCGTemp *ct = tcgv_vec_temp(c); 774 TCGTemp *dt = tcgv_vec_temp(d); 775 TCGArg ri = temp_arg(rt); 776 TCGArg ai = temp_arg(at); 777 TCGArg bi = temp_arg(bt); 778 TCGArg ci = temp_arg(ct); 779 TCGArg di = temp_arg(dt); 780 TCGType type = rt->base_type; 781 const TCGOpcode *hold_list; 782 int can; 783 784 tcg_debug_assert(at->base_type >= type); 785 tcg_debug_assert(bt->base_type >= type); 786 tcg_debug_assert(ct->base_type >= type); 787 tcg_debug_assert(dt->base_type >= type); 788 789 tcg_assert_listed_vecop(INDEX_op_cmpsel_vec); 790 hold_list = tcg_swap_vecop_list(NULL); 791 can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece); 792 793 if (can > 0) { 794 vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond); 795 } else if (can < 0) { 796 tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece, 797 ri, ai, bi, ci, di, cond); 798 } else { 799 TCGv_vec t = tcg_temp_new_vec(type); 800 tcg_gen_cmp_vec(cond, vece, t, a, b); 801 tcg_gen_bitsel_vec(vece, r, t, c, d); 802 tcg_temp_free_vec(t); 803 } 804 tcg_swap_vecop_list(hold_list); 805 } 806