1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "accel/tcg/cpu-ldst.h" 25 #include "accel/tcg/probe.h" 26 #include "exec/page-protection.h" 27 #include "exec/helper-proto.h" 28 #include "exec/tlb-flags.h" 29 #include "exec/target_page.h" 30 #include "exec/tswap.h" 31 #include "fpu/softfloat.h" 32 #include "tcg/tcg-gvec-desc.h" 33 #include "internals.h" 34 #include "vector_internals.h" 35 #include <math.h> 36 37 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 38 target_ulong s2) 39 { 40 int vlmax, vl; 41 RISCVCPU *cpu = env_archcpu(env); 42 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 43 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 44 uint16_t sew = 8 << vsew; 45 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 46 int xlen = riscv_cpu_xlen(env); 47 bool vill = (s2 >> (xlen - 1)) & 0x1; 48 target_ulong reserved = s2 & 49 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 50 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 51 uint16_t vlen = cpu->cfg.vlenb << 3; 52 int8_t lmul; 53 54 if (vlmul & 4) { 55 /* 56 * Fractional LMUL, check: 57 * 58 * VLEN * LMUL >= SEW 59 * VLEN >> (8 - lmul) >= sew 60 * (vlenb << 3) >> (8 - lmul) >= sew 61 */ 62 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { 63 vill = true; 64 } 65 } 66 67 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 68 /* only set vill bit. */ 69 env->vill = 1; 70 env->vtype = 0; 71 env->vl = 0; 72 env->vstart = 0; 73 return 0; 74 } 75 76 /* lmul encoded as in DisasContext::lmul */ 77 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 78 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 79 if (s1 <= vlmax) { 80 vl = s1; 81 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) { 82 vl = (s1 + 1) >> 1; 83 } else { 84 vl = vlmax; 85 } 86 env->vl = vl; 87 env->vtype = s2; 88 env->vstart = 0; 89 env->vill = 0; 90 return vl; 91 } 92 93 /* 94 * Get the maximum number of elements can be operated. 95 * 96 * log2_esz: log2 of element size in bytes. 97 */ 98 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 99 { 100 /* 101 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 102 * so vlen in bytes (vlenb) is encoded as maxsz. 103 */ 104 uint32_t vlenb = simd_maxsz(desc); 105 106 /* Return VLMAX */ 107 int scale = vext_lmul(desc) - log2_esz; 108 return scale < 0 ? vlenb >> -scale : vlenb << scale; 109 } 110 111 /* 112 * This function checks watchpoint before real load operation. 113 * 114 * In system mode, the TLB API probe_access is enough for watchpoint check. 115 * In user mode, there is no watchpoint support now. 116 * 117 * It will trigger an exception if there is no mapping in TLB 118 * and page table walk can't fill the TLB entry. Then the guest 119 * software can return here after process the exception or never return. 120 * 121 * This function can also be used when direct access to probe_access_flags is 122 * needed in order to access the flags. If a pointer to a flags operand is 123 * provided the function will call probe_access_flags instead, use nonfault 124 * and update host and flags. 125 */ 126 static void probe_pages(CPURISCVState *env, target_ulong addr, target_ulong len, 127 uintptr_t ra, MMUAccessType access_type, int mmu_index, 128 void **host, int *flags, bool nonfault) 129 { 130 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 131 target_ulong curlen = MIN(pagelen, len); 132 133 if (flags != NULL) { 134 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen, 135 access_type, mmu_index, nonfault, host, ra); 136 } else { 137 probe_access(env, adjust_addr(env, addr), curlen, access_type, 138 mmu_index, ra); 139 } 140 141 if (len > curlen) { 142 addr += curlen; 143 curlen = len - curlen; 144 if (flags != NULL) { 145 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen, 146 access_type, mmu_index, nonfault, 147 host, ra); 148 } else { 149 probe_access(env, adjust_addr(env, addr), curlen, access_type, 150 mmu_index, ra); 151 } 152 } 153 } 154 155 156 static inline void vext_set_elem_mask(void *v0, int index, 157 uint8_t value) 158 { 159 int idx = index / 64; 160 int pos = index % 64; 161 uint64_t old = ((uint64_t *)v0)[idx]; 162 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 163 } 164 165 /* elements operations for load and store */ 166 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, 167 uint32_t idx, void *vd, uintptr_t retaddr); 168 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); 169 170 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 171 static inline QEMU_ALWAYS_INLINE \ 172 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 173 uint32_t idx, void *vd, uintptr_t retaddr) \ 174 { \ 175 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 176 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 177 } \ 178 \ 179 static inline QEMU_ALWAYS_INLINE \ 180 void NAME##_host(void *vd, uint32_t idx, void *host) \ 181 { \ 182 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 183 *cur = (ETYPE)LDSUF##_p(host); \ 184 } 185 186 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub) 187 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw) 188 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) 189 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) 190 191 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 192 static inline QEMU_ALWAYS_INLINE \ 193 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 194 uint32_t idx, void *vd, uintptr_t retaddr) \ 195 { \ 196 ETYPE data = *((ETYPE *)vd + H(idx)); \ 197 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 198 } \ 199 \ 200 static inline QEMU_ALWAYS_INLINE \ 201 void NAME##_host(void *vd, uint32_t idx, void *host) \ 202 { \ 203 ETYPE data = *((ETYPE *)vd + H(idx)); \ 204 STSUF##_p(host, data); \ 205 } 206 207 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb) 208 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw) 209 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl) 210 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq) 211 212 static inline QEMU_ALWAYS_INLINE void 213 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb, 214 void *vd, uint32_t evl, target_ulong addr, 215 uint32_t reg_start, uintptr_t ra, uint32_t esz, 216 bool is_load) 217 { 218 uint32_t i; 219 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) { 220 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra); 221 } 222 } 223 224 static inline QEMU_ALWAYS_INLINE void 225 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host, 226 void *vd, uint32_t evl, uint32_t reg_start, void *host, 227 uint32_t esz, bool is_load) 228 { 229 #if HOST_BIG_ENDIAN 230 for (; reg_start < evl; reg_start++, host += esz) { 231 ldst_host(vd, reg_start, host); 232 } 233 #else 234 if (esz == 1) { 235 uint32_t byte_offset = reg_start * esz; 236 uint32_t size = (evl - reg_start) * esz; 237 238 if (is_load) { 239 memcpy(vd + byte_offset, host, size); 240 } else { 241 memcpy(host, vd + byte_offset, size); 242 } 243 } else { 244 for (; reg_start < evl; reg_start++, host += esz) { 245 ldst_host(vd, reg_start, host); 246 } 247 } 248 #endif 249 } 250 251 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 252 uint32_t desc, uint32_t nf, 253 uint32_t esz, uint32_t max_elems) 254 { 255 uint32_t vta = vext_vta(desc); 256 int k; 257 258 if (vta == 0) { 259 return; 260 } 261 262 for (k = 0; k < nf; ++k) { 263 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 264 (k * max_elems + max_elems) * esz); 265 } 266 } 267 268 /* 269 * stride: access vector element from strided memory 270 */ 271 static void 272 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, 273 CPURISCVState *env, uint32_t desc, uint32_t vm, 274 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz, 275 uintptr_t ra) 276 { 277 uint32_t i, k; 278 uint32_t nf = vext_nf(desc); 279 uint32_t max_elems = vext_max_elems(desc, log2_esz); 280 uint32_t esz = 1 << log2_esz; 281 uint32_t vma = vext_vma(desc); 282 283 VSTART_CHECK_EARLY_EXIT(env, env->vl); 284 285 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 286 k = 0; 287 while (k < nf) { 288 if (!vm && !vext_elem_mask(v0, i)) { 289 /* set masked-off elements to 1s */ 290 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 291 (i + k * max_elems + 1) * esz); 292 k++; 293 continue; 294 } 295 target_ulong addr = base + stride * i + (k << log2_esz); 296 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 297 k++; 298 } 299 } 300 env->vstart = 0; 301 302 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 303 } 304 305 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 306 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 307 target_ulong stride, CPURISCVState *env, \ 308 uint32_t desc) \ 309 { \ 310 uint32_t vm = vext_vm(desc); \ 311 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 312 ctzl(sizeof(ETYPE)), GETPC()); \ 313 } 314 315 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb) 316 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb) 317 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb) 318 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb) 319 320 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 321 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 322 target_ulong stride, CPURISCVState *env, \ 323 uint32_t desc) \ 324 { \ 325 uint32_t vm = vext_vm(desc); \ 326 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 327 ctzl(sizeof(ETYPE)), GETPC()); \ 328 } 329 330 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb) 331 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb) 332 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb) 333 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) 334 335 /* 336 * unit-stride: access elements stored contiguously in memory 337 */ 338 339 /* unmasked unit-stride load and store operation */ 340 static inline QEMU_ALWAYS_INLINE void 341 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, 342 uint32_t elems, uint32_t nf, uint32_t max_elems, 343 uint32_t log2_esz, bool is_load, int mmu_index, 344 vext_ldst_elem_fn_tlb *ldst_tlb, 345 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra) 346 { 347 void *host; 348 int i, k, flags; 349 uint32_t esz = 1 << log2_esz; 350 uint32_t size = (elems * nf) << log2_esz; 351 uint32_t evl = env->vstart + elems; 352 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 353 354 /* Check page permission/pmp/watchpoint/etc. */ 355 probe_pages(env, addr, size, ra, access_type, mmu_index, &host, &flags, 356 true); 357 358 if (flags == 0) { 359 if (nf == 1) { 360 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart, 361 host, esz, is_load); 362 } else { 363 for (i = env->vstart; i < evl; ++i) { 364 k = 0; 365 while (k < nf) { 366 ldst_host(vd, i + k * max_elems, host); 367 host += esz; 368 k++; 369 } 370 } 371 } 372 env->vstart += elems; 373 } else { 374 if (nf == 1) { 375 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, 376 ra, esz, is_load); 377 } else { 378 /* load bytes from guest memory */ 379 for (i = env->vstart; i < evl; env->vstart = ++i) { 380 k = 0; 381 while (k < nf) { 382 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 383 vd, ra); 384 addr += esz; 385 k++; 386 } 387 } 388 } 389 } 390 } 391 392 static inline QEMU_ALWAYS_INLINE void 393 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 394 vext_ldst_elem_fn_tlb *ldst_tlb, 395 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 396 uint32_t evl, uintptr_t ra, bool is_load) 397 { 398 uint32_t k; 399 target_ulong page_split, elems, addr; 400 uint32_t nf = vext_nf(desc); 401 uint32_t max_elems = vext_max_elems(desc, log2_esz); 402 uint32_t esz = 1 << log2_esz; 403 uint32_t msize = nf * esz; 404 int mmu_index = riscv_env_mmu_index(env, false); 405 406 VSTART_CHECK_EARLY_EXIT(env, evl); 407 408 #if defined(CONFIG_USER_ONLY) 409 /* 410 * For data sizes <= 6 bytes we get better performance by simply calling 411 * vext_continuous_ldst_tlb 412 */ 413 if (nf == 1 && (evl << log2_esz) <= 6) { 414 addr = base + (env->vstart << log2_esz); 415 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra, 416 esz, is_load); 417 418 env->vstart = 0; 419 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 420 return; 421 } 422 #endif 423 424 /* Calculate the page range of first page */ 425 addr = base + ((env->vstart * nf) << log2_esz); 426 page_split = -(addr | TARGET_PAGE_MASK); 427 /* Get number of elements */ 428 elems = page_split / msize; 429 if (unlikely(env->vstart + elems >= evl)) { 430 elems = evl - env->vstart; 431 } 432 433 /* Load/store elements in the first page */ 434 if (likely(elems)) { 435 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 436 is_load, mmu_index, ldst_tlb, ldst_host, ra); 437 } 438 439 /* Load/store elements in the second page */ 440 if (unlikely(env->vstart < evl)) { 441 /* Cross page element */ 442 if (unlikely(page_split % msize)) { 443 for (k = 0; k < nf; k++) { 444 addr = base + ((env->vstart * nf + k) << log2_esz); 445 ldst_tlb(env, adjust_addr(env, addr), 446 env->vstart + k * max_elems, vd, ra); 447 } 448 env->vstart++; 449 } 450 451 addr = base + ((env->vstart * nf) << log2_esz); 452 /* Get number of elements of second page */ 453 elems = evl - env->vstart; 454 455 /* Load/store elements in the second page */ 456 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 457 is_load, mmu_index, ldst_tlb, ldst_host, ra); 458 } 459 460 env->vstart = 0; 461 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 462 } 463 464 /* 465 * masked unit-stride load and store operation will be a special case of 466 * stride, stride = NF * sizeof (ETYPE) 467 */ 468 469 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 470 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 471 CPURISCVState *env, uint32_t desc) \ 472 { \ 473 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 474 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 475 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 476 } \ 477 \ 478 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 479 CPURISCVState *env, uint32_t desc) \ 480 { \ 481 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 482 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \ 483 } 484 485 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host) 486 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host) 487 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host) 488 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host) 489 490 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 491 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 492 CPURISCVState *env, uint32_t desc) \ 493 { \ 494 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 495 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 496 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 497 } \ 498 \ 499 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 500 CPURISCVState *env, uint32_t desc) \ 501 { \ 502 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 503 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \ 504 } 505 506 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host) 507 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host) 508 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host) 509 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host) 510 511 /* 512 * unit stride mask load and store, EEW = 1 513 */ 514 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 515 CPURISCVState *env, uint32_t desc) 516 { 517 /* evl = ceil(vl/8) */ 518 uint8_t evl = (env->vl + 7) >> 3; 519 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host, 520 0, evl, GETPC(), true); 521 } 522 523 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 524 CPURISCVState *env, uint32_t desc) 525 { 526 /* evl = ceil(vl/8) */ 527 uint8_t evl = (env->vl + 7) >> 3; 528 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host, 529 0, evl, GETPC(), false); 530 } 531 532 /* 533 * index: access vector element from indexed memory 534 */ 535 typedef target_ulong vext_get_index_addr(target_ulong base, 536 uint32_t idx, void *vs2); 537 538 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 539 static target_ulong NAME(target_ulong base, \ 540 uint32_t idx, void *vs2) \ 541 { \ 542 return (base + *((ETYPE *)vs2 + H(idx))); \ 543 } 544 545 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 546 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 547 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 548 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 549 550 static inline void 551 vext_ldst_index(void *vd, void *v0, target_ulong base, 552 void *vs2, CPURISCVState *env, uint32_t desc, 553 vext_get_index_addr get_index_addr, 554 vext_ldst_elem_fn_tlb *ldst_elem, 555 uint32_t log2_esz, uintptr_t ra) 556 { 557 uint32_t i, k; 558 uint32_t nf = vext_nf(desc); 559 uint32_t vm = vext_vm(desc); 560 uint32_t max_elems = vext_max_elems(desc, log2_esz); 561 uint32_t esz = 1 << log2_esz; 562 uint32_t vma = vext_vma(desc); 563 564 VSTART_CHECK_EARLY_EXIT(env, env->vl); 565 566 /* load bytes from guest memory */ 567 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 568 k = 0; 569 while (k < nf) { 570 if (!vm && !vext_elem_mask(v0, i)) { 571 /* set masked-off elements to 1s */ 572 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 573 (i + k * max_elems + 1) * esz); 574 k++; 575 continue; 576 } 577 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 578 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 579 k++; 580 } 581 } 582 env->vstart = 0; 583 584 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 585 } 586 587 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 588 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 589 void *vs2, CPURISCVState *env, uint32_t desc) \ 590 { \ 591 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 592 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 593 } 594 595 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb) 596 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb) 597 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb) 598 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb) 599 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb) 600 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb) 601 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb) 602 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb) 603 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb) 604 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb) 605 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb) 606 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb) 607 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb) 608 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb) 609 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb) 610 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb) 611 612 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 613 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 614 void *vs2, CPURISCVState *env, uint32_t desc) \ 615 { \ 616 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 617 STORE_FN, ctzl(sizeof(ETYPE)), \ 618 GETPC()); \ 619 } 620 621 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb) 622 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb) 623 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb) 624 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb) 625 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb) 626 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb) 627 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb) 628 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb) 629 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb) 630 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb) 631 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb) 632 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb) 633 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb) 634 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb) 635 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb) 636 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb) 637 638 /* 639 * unit-stride fault-only-fisrt load instructions 640 */ 641 static inline void 642 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, 643 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, 644 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra) 645 { 646 uint32_t i, k, vl = 0; 647 uint32_t nf = vext_nf(desc); 648 uint32_t vm = vext_vm(desc); 649 uint32_t max_elems = vext_max_elems(desc, log2_esz); 650 uint32_t esz = 1 << log2_esz; 651 uint32_t msize = nf * esz; 652 uint32_t vma = vext_vma(desc); 653 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems; 654 int mmu_index = riscv_env_mmu_index(env, false); 655 int flags, probe_flags; 656 void *host; 657 658 VSTART_CHECK_EARLY_EXIT(env, env->vl); 659 660 addr = base + ((env->vstart * nf) << log2_esz); 661 page_split = -(addr | TARGET_PAGE_MASK); 662 /* Get number of elements */ 663 elems = page_split / msize; 664 if (unlikely(env->vstart + elems >= env->vl)) { 665 elems = env->vl - env->vstart; 666 } 667 668 /* Check page permission/pmp/watchpoint/etc. */ 669 probe_pages(env, addr, elems * msize, ra, MMU_DATA_LOAD, mmu_index, &host, 670 &flags, true); 671 672 /* If we are crossing a page check also the second page. */ 673 if (env->vl > elems) { 674 addr_probe = addr + (elems << log2_esz); 675 probe_pages(env, addr_probe, elems * msize, ra, MMU_DATA_LOAD, 676 mmu_index, &host, &probe_flags, true); 677 flags |= probe_flags; 678 } 679 680 if (flags & ~TLB_WATCHPOINT) { 681 /* probe every access */ 682 for (i = env->vstart; i < env->vl; i++) { 683 if (!vm && !vext_elem_mask(v0, i)) { 684 continue; 685 } 686 addr_i = adjust_addr(env, base + i * (nf << log2_esz)); 687 if (i == 0) { 688 /* Allow fault on first element. */ 689 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD, 690 mmu_index, &host, NULL, false); 691 } else { 692 remain = nf << log2_esz; 693 while (remain > 0) { 694 offset = -(addr_i | TARGET_PAGE_MASK); 695 696 /* Probe nonfault on subsequent elements. */ 697 probe_pages(env, addr_i, offset, 0, MMU_DATA_LOAD, 698 mmu_index, &host, &flags, true); 699 700 /* 701 * Stop if invalid (unmapped) or mmio (transaction may 702 * fail). Do not stop if watchpoint, as the spec says that 703 * first-fault should continue to access the same 704 * elements regardless of any watchpoint. 705 */ 706 if (flags & ~TLB_WATCHPOINT) { 707 vl = i; 708 goto ProbeSuccess; 709 } 710 if (remain <= offset) { 711 break; 712 } 713 remain -= offset; 714 addr_i = adjust_addr(env, addr_i + offset); 715 } 716 } 717 } 718 } 719 ProbeSuccess: 720 /* load bytes from guest memory */ 721 if (vl != 0) { 722 env->vl = vl; 723 } 724 725 if (env->vstart < env->vl) { 726 if (vm) { 727 /* Load/store elements in the first page */ 728 if (likely(elems)) { 729 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 730 log2_esz, true, mmu_index, ldst_tlb, 731 ldst_host, ra); 732 } 733 734 /* Load/store elements in the second page */ 735 if (unlikely(env->vstart < env->vl)) { 736 /* Cross page element */ 737 if (unlikely(page_split % msize)) { 738 for (k = 0; k < nf; k++) { 739 addr = base + ((env->vstart * nf + k) << log2_esz); 740 ldst_tlb(env, adjust_addr(env, addr), 741 env->vstart + k * max_elems, vd, ra); 742 } 743 env->vstart++; 744 } 745 746 addr = base + ((env->vstart * nf) << log2_esz); 747 /* Get number of elements of second page */ 748 elems = env->vl - env->vstart; 749 750 /* Load/store elements in the second page */ 751 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 752 log2_esz, true, mmu_index, ldst_tlb, 753 ldst_host, ra); 754 } 755 } else { 756 for (i = env->vstart; i < env->vl; i++) { 757 k = 0; 758 while (k < nf) { 759 if (!vext_elem_mask(v0, i)) { 760 /* set masked-off elements to 1s */ 761 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 762 (i + k * max_elems + 1) * esz); 763 k++; 764 continue; 765 } 766 addr = base + ((i * nf + k) << log2_esz); 767 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 768 vd, ra); 769 k++; 770 } 771 } 772 } 773 } 774 env->vstart = 0; 775 776 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 777 } 778 779 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 780 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 781 CPURISCVState *env, uint32_t desc) \ 782 { \ 783 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \ 784 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \ 785 } 786 787 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host) 788 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host) 789 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host) 790 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) 791 792 #define DO_SWAP(N, M) (M) 793 #define DO_AND(N, M) (N & M) 794 #define DO_XOR(N, M) (N ^ M) 795 #define DO_OR(N, M) (N | M) 796 #define DO_ADD(N, M) (N + M) 797 798 /* Signed min/max */ 799 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 800 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 801 802 /* 803 * load and store whole register instructions 804 */ 805 static inline QEMU_ALWAYS_INLINE void 806 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 807 vext_ldst_elem_fn_tlb *ldst_tlb, 808 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 809 uintptr_t ra, bool is_load) 810 { 811 target_ulong page_split, elems, addr; 812 uint32_t nf = vext_nf(desc); 813 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 814 uint32_t max_elems = vlenb >> log2_esz; 815 uint32_t evl = nf * max_elems; 816 uint32_t esz = 1 << log2_esz; 817 int mmu_index = riscv_env_mmu_index(env, false); 818 819 /* Calculate the page range of first page */ 820 addr = base + (env->vstart << log2_esz); 821 page_split = -(addr | TARGET_PAGE_MASK); 822 /* Get number of elements */ 823 elems = page_split / esz; 824 if (unlikely(env->vstart + elems >= evl)) { 825 elems = evl - env->vstart; 826 } 827 828 /* Load/store elements in the first page */ 829 if (likely(elems)) { 830 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 831 is_load, mmu_index, ldst_tlb, ldst_host, ra); 832 } 833 834 /* Load/store elements in the second page */ 835 if (unlikely(env->vstart < evl)) { 836 /* Cross page element */ 837 if (unlikely(page_split % esz)) { 838 addr = base + (env->vstart << log2_esz); 839 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra); 840 env->vstart++; 841 } 842 843 addr = base + (env->vstart << log2_esz); 844 /* Get number of elements of second page */ 845 elems = evl - env->vstart; 846 847 /* Load/store elements in the second page */ 848 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 849 is_load, mmu_index, ldst_tlb, ldst_host, ra); 850 } 851 852 env->vstart = 0; 853 } 854 855 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 856 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 857 uint32_t desc) \ 858 { \ 859 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 860 ctzl(sizeof(ETYPE)), GETPC(), true); \ 861 } 862 863 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host) 864 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host) 865 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host) 866 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host) 867 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host) 868 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host) 869 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host) 870 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host) 871 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host) 872 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host) 873 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host) 874 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host) 875 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host) 876 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host) 877 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host) 878 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host) 879 880 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 881 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 882 uint32_t desc) \ 883 { \ 884 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 885 ctzl(sizeof(ETYPE)), GETPC(), false); \ 886 } 887 888 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host) 889 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host) 890 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host) 891 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host) 892 893 /* 894 * Vector Integer Arithmetic Instructions 895 */ 896 897 /* (TD, T1, T2, TX1, TX2) */ 898 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 899 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 900 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 901 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 902 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 903 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 904 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 905 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 906 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 907 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 908 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 909 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 910 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 911 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 912 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 913 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 914 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 915 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 916 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 917 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 918 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 919 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 920 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 921 922 #define DO_SUB(N, M) (N - M) 923 #define DO_RSUB(N, M) (M - N) 924 925 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 926 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 927 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 928 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 929 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 930 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 931 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 932 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 933 934 GEN_VEXT_VV(vadd_vv_b, 1) 935 GEN_VEXT_VV(vadd_vv_h, 2) 936 GEN_VEXT_VV(vadd_vv_w, 4) 937 GEN_VEXT_VV(vadd_vv_d, 8) 938 GEN_VEXT_VV(vsub_vv_b, 1) 939 GEN_VEXT_VV(vsub_vv_h, 2) 940 GEN_VEXT_VV(vsub_vv_w, 4) 941 GEN_VEXT_VV(vsub_vv_d, 8) 942 943 944 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 945 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 946 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 947 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 948 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 949 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 950 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 951 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 952 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 953 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 954 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 955 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 956 957 GEN_VEXT_VX(vadd_vx_b, 1) 958 GEN_VEXT_VX(vadd_vx_h, 2) 959 GEN_VEXT_VX(vadd_vx_w, 4) 960 GEN_VEXT_VX(vadd_vx_d, 8) 961 GEN_VEXT_VX(vsub_vx_b, 1) 962 GEN_VEXT_VX(vsub_vx_h, 2) 963 GEN_VEXT_VX(vsub_vx_w, 4) 964 GEN_VEXT_VX(vsub_vx_d, 8) 965 GEN_VEXT_VX(vrsub_vx_b, 1) 966 GEN_VEXT_VX(vrsub_vx_h, 2) 967 GEN_VEXT_VX(vrsub_vx_w, 4) 968 GEN_VEXT_VX(vrsub_vx_d, 8) 969 970 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 971 { 972 intptr_t oprsz = simd_oprsz(desc); 973 intptr_t i; 974 975 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 976 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 977 } 978 } 979 980 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 981 { 982 intptr_t oprsz = simd_oprsz(desc); 983 intptr_t i; 984 985 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 986 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 987 } 988 } 989 990 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 991 { 992 intptr_t oprsz = simd_oprsz(desc); 993 intptr_t i; 994 995 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 996 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 997 } 998 } 999 1000 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 1001 { 1002 intptr_t oprsz = simd_oprsz(desc); 1003 intptr_t i; 1004 1005 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1006 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 1007 } 1008 } 1009 1010 /* Vector Widening Integer Add/Subtract */ 1011 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 1012 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 1013 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 1014 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 1015 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 1016 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 1017 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 1018 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 1019 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 1020 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 1021 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 1022 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 1023 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 1024 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 1025 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 1026 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 1027 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 1028 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1029 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1030 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1031 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1032 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1033 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1034 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1035 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1036 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1037 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1038 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1039 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1040 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1041 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1042 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1043 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1044 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1045 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1046 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1047 GEN_VEXT_VV(vwaddu_vv_b, 2) 1048 GEN_VEXT_VV(vwaddu_vv_h, 4) 1049 GEN_VEXT_VV(vwaddu_vv_w, 8) 1050 GEN_VEXT_VV(vwsubu_vv_b, 2) 1051 GEN_VEXT_VV(vwsubu_vv_h, 4) 1052 GEN_VEXT_VV(vwsubu_vv_w, 8) 1053 GEN_VEXT_VV(vwadd_vv_b, 2) 1054 GEN_VEXT_VV(vwadd_vv_h, 4) 1055 GEN_VEXT_VV(vwadd_vv_w, 8) 1056 GEN_VEXT_VV(vwsub_vv_b, 2) 1057 GEN_VEXT_VV(vwsub_vv_h, 4) 1058 GEN_VEXT_VV(vwsub_vv_w, 8) 1059 GEN_VEXT_VV(vwaddu_wv_b, 2) 1060 GEN_VEXT_VV(vwaddu_wv_h, 4) 1061 GEN_VEXT_VV(vwaddu_wv_w, 8) 1062 GEN_VEXT_VV(vwsubu_wv_b, 2) 1063 GEN_VEXT_VV(vwsubu_wv_h, 4) 1064 GEN_VEXT_VV(vwsubu_wv_w, 8) 1065 GEN_VEXT_VV(vwadd_wv_b, 2) 1066 GEN_VEXT_VV(vwadd_wv_h, 4) 1067 GEN_VEXT_VV(vwadd_wv_w, 8) 1068 GEN_VEXT_VV(vwsub_wv_b, 2) 1069 GEN_VEXT_VV(vwsub_wv_h, 4) 1070 GEN_VEXT_VV(vwsub_wv_w, 8) 1071 1072 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1073 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1074 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1075 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1076 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1077 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1078 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1079 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1080 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1081 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1082 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1083 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1084 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1085 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1086 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1087 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1088 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1089 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1090 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1091 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1092 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1093 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1094 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1095 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1096 GEN_VEXT_VX(vwaddu_vx_b, 2) 1097 GEN_VEXT_VX(vwaddu_vx_h, 4) 1098 GEN_VEXT_VX(vwaddu_vx_w, 8) 1099 GEN_VEXT_VX(vwsubu_vx_b, 2) 1100 GEN_VEXT_VX(vwsubu_vx_h, 4) 1101 GEN_VEXT_VX(vwsubu_vx_w, 8) 1102 GEN_VEXT_VX(vwadd_vx_b, 2) 1103 GEN_VEXT_VX(vwadd_vx_h, 4) 1104 GEN_VEXT_VX(vwadd_vx_w, 8) 1105 GEN_VEXT_VX(vwsub_vx_b, 2) 1106 GEN_VEXT_VX(vwsub_vx_h, 4) 1107 GEN_VEXT_VX(vwsub_vx_w, 8) 1108 GEN_VEXT_VX(vwaddu_wx_b, 2) 1109 GEN_VEXT_VX(vwaddu_wx_h, 4) 1110 GEN_VEXT_VX(vwaddu_wx_w, 8) 1111 GEN_VEXT_VX(vwsubu_wx_b, 2) 1112 GEN_VEXT_VX(vwsubu_wx_h, 4) 1113 GEN_VEXT_VX(vwsubu_wx_w, 8) 1114 GEN_VEXT_VX(vwadd_wx_b, 2) 1115 GEN_VEXT_VX(vwadd_wx_h, 4) 1116 GEN_VEXT_VX(vwadd_wx_w, 8) 1117 GEN_VEXT_VX(vwsub_wx_b, 2) 1118 GEN_VEXT_VX(vwsub_wx_h, 4) 1119 GEN_VEXT_VX(vwsub_wx_w, 8) 1120 1121 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1122 #define DO_VADC(N, M, C) (N + M + C) 1123 #define DO_VSBC(N, M, C) (N - M - C) 1124 1125 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1126 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1127 CPURISCVState *env, uint32_t desc) \ 1128 { \ 1129 uint32_t vl = env->vl; \ 1130 uint32_t esz = sizeof(ETYPE); \ 1131 uint32_t total_elems = \ 1132 vext_get_total_elems(env, desc, esz); \ 1133 uint32_t vta = vext_vta(desc); \ 1134 uint32_t i; \ 1135 \ 1136 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1137 \ 1138 for (i = env->vstart; i < vl; i++) { \ 1139 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1140 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1141 ETYPE carry = vext_elem_mask(v0, i); \ 1142 \ 1143 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1144 } \ 1145 env->vstart = 0; \ 1146 /* set tail elements to 1s */ \ 1147 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1148 } 1149 1150 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1151 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1152 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1153 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1154 1155 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1156 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1157 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1158 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1159 1160 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1161 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1162 CPURISCVState *env, uint32_t desc) \ 1163 { \ 1164 uint32_t vl = env->vl; \ 1165 uint32_t esz = sizeof(ETYPE); \ 1166 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1167 uint32_t vta = vext_vta(desc); \ 1168 uint32_t i; \ 1169 \ 1170 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1171 \ 1172 for (i = env->vstart; i < vl; i++) { \ 1173 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1174 ETYPE carry = vext_elem_mask(v0, i); \ 1175 \ 1176 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1177 } \ 1178 env->vstart = 0; \ 1179 /* set tail elements to 1s */ \ 1180 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1181 } 1182 1183 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1184 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1185 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1186 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1187 1188 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1189 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1190 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1191 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1192 1193 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1194 (__typeof(N))(N + M) < N) 1195 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1196 1197 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1198 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1199 CPURISCVState *env, uint32_t desc) \ 1200 { \ 1201 uint32_t vl = env->vl; \ 1202 uint32_t vm = vext_vm(desc); \ 1203 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1204 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1205 uint32_t i; \ 1206 \ 1207 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1208 \ 1209 for (i = env->vstart; i < vl; i++) { \ 1210 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1211 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1212 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1213 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1214 } \ 1215 env->vstart = 0; \ 1216 /* 1217 * mask destination register are always tail-agnostic 1218 * set tail elements to 1s 1219 */ \ 1220 if (vta_all_1s) { \ 1221 for (; i < total_elems; i++) { \ 1222 vext_set_elem_mask(vd, i, 1); \ 1223 } \ 1224 } \ 1225 } 1226 1227 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1228 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1229 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1230 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1231 1232 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1233 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1234 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1235 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1236 1237 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1238 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1239 void *vs2, CPURISCVState *env, uint32_t desc) \ 1240 { \ 1241 uint32_t vl = env->vl; \ 1242 uint32_t vm = vext_vm(desc); \ 1243 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1244 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1245 uint32_t i; \ 1246 \ 1247 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1248 \ 1249 for (i = env->vstart; i < vl; i++) { \ 1250 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1251 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1252 vext_set_elem_mask(vd, i, \ 1253 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1254 } \ 1255 env->vstart = 0; \ 1256 /* 1257 * mask destination register are always tail-agnostic 1258 * set tail elements to 1s 1259 */ \ 1260 if (vta_all_1s) { \ 1261 for (; i < total_elems; i++) { \ 1262 vext_set_elem_mask(vd, i, 1); \ 1263 } \ 1264 } \ 1265 } 1266 1267 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1268 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1269 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1270 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1271 1272 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1273 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1274 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1275 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1276 1277 /* Vector Bitwise Logical Instructions */ 1278 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1279 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1280 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1281 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1282 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1283 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1284 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1285 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1286 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1287 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1288 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1289 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1290 GEN_VEXT_VV(vand_vv_b, 1) 1291 GEN_VEXT_VV(vand_vv_h, 2) 1292 GEN_VEXT_VV(vand_vv_w, 4) 1293 GEN_VEXT_VV(vand_vv_d, 8) 1294 GEN_VEXT_VV(vor_vv_b, 1) 1295 GEN_VEXT_VV(vor_vv_h, 2) 1296 GEN_VEXT_VV(vor_vv_w, 4) 1297 GEN_VEXT_VV(vor_vv_d, 8) 1298 GEN_VEXT_VV(vxor_vv_b, 1) 1299 GEN_VEXT_VV(vxor_vv_h, 2) 1300 GEN_VEXT_VV(vxor_vv_w, 4) 1301 GEN_VEXT_VV(vxor_vv_d, 8) 1302 1303 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1304 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1305 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1306 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1307 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1308 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1309 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1310 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1311 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1312 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1313 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1314 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1315 GEN_VEXT_VX(vand_vx_b, 1) 1316 GEN_VEXT_VX(vand_vx_h, 2) 1317 GEN_VEXT_VX(vand_vx_w, 4) 1318 GEN_VEXT_VX(vand_vx_d, 8) 1319 GEN_VEXT_VX(vor_vx_b, 1) 1320 GEN_VEXT_VX(vor_vx_h, 2) 1321 GEN_VEXT_VX(vor_vx_w, 4) 1322 GEN_VEXT_VX(vor_vx_d, 8) 1323 GEN_VEXT_VX(vxor_vx_b, 1) 1324 GEN_VEXT_VX(vxor_vx_h, 2) 1325 GEN_VEXT_VX(vxor_vx_w, 4) 1326 GEN_VEXT_VX(vxor_vx_d, 8) 1327 1328 /* Vector Single-Width Bit Shift Instructions */ 1329 #define DO_SLL(N, M) (N << (M)) 1330 #define DO_SRL(N, M) (N >> (M)) 1331 1332 /* generate the helpers for shift instructions with two vector operators */ 1333 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1334 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1335 void *vs2, CPURISCVState *env, uint32_t desc) \ 1336 { \ 1337 uint32_t vm = vext_vm(desc); \ 1338 uint32_t vl = env->vl; \ 1339 uint32_t esz = sizeof(TS1); \ 1340 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1341 uint32_t vta = vext_vta(desc); \ 1342 uint32_t vma = vext_vma(desc); \ 1343 uint32_t i; \ 1344 \ 1345 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1346 \ 1347 for (i = env->vstart; i < vl; i++) { \ 1348 if (!vm && !vext_elem_mask(v0, i)) { \ 1349 /* set masked-off elements to 1s */ \ 1350 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1351 continue; \ 1352 } \ 1353 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1354 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1355 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1356 } \ 1357 env->vstart = 0; \ 1358 /* set tail elements to 1s */ \ 1359 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1360 } 1361 1362 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1363 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1364 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1365 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1366 1367 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1368 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1369 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1370 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1371 1372 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1373 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1374 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1375 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1376 1377 /* 1378 * generate the helpers for shift instructions with one vector and one scalar 1379 */ 1380 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1381 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1382 void *vs2, CPURISCVState *env, \ 1383 uint32_t desc) \ 1384 { \ 1385 uint32_t vm = vext_vm(desc); \ 1386 uint32_t vl = env->vl; \ 1387 uint32_t esz = sizeof(TD); \ 1388 uint32_t total_elems = \ 1389 vext_get_total_elems(env, desc, esz); \ 1390 uint32_t vta = vext_vta(desc); \ 1391 uint32_t vma = vext_vma(desc); \ 1392 uint32_t i; \ 1393 \ 1394 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1395 \ 1396 for (i = env->vstart; i < vl; i++) { \ 1397 if (!vm && !vext_elem_mask(v0, i)) { \ 1398 /* set masked-off elements to 1s */ \ 1399 vext_set_elems_1s(vd, vma, i * esz, \ 1400 (i + 1) * esz); \ 1401 continue; \ 1402 } \ 1403 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1404 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1405 } \ 1406 env->vstart = 0; \ 1407 /* set tail elements to 1s */ \ 1408 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1409 } 1410 1411 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1412 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1413 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1414 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1415 1416 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1417 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1418 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1419 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1420 1421 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1422 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1423 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1424 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1425 1426 /* Vector Narrowing Integer Right Shift Instructions */ 1427 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1428 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1429 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1430 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1431 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1432 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1433 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1434 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1435 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1436 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1437 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1438 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1439 1440 /* Vector Integer Comparison Instructions */ 1441 #define DO_MSEQ(N, M) (N == M) 1442 #define DO_MSNE(N, M) (N != M) 1443 #define DO_MSLT(N, M) (N < M) 1444 #define DO_MSLE(N, M) (N <= M) 1445 #define DO_MSGT(N, M) (N > M) 1446 1447 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1448 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1449 CPURISCVState *env, uint32_t desc) \ 1450 { \ 1451 uint32_t vm = vext_vm(desc); \ 1452 uint32_t vl = env->vl; \ 1453 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1454 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1455 uint32_t vma = vext_vma(desc); \ 1456 uint32_t i; \ 1457 \ 1458 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1459 \ 1460 for (i = env->vstart; i < vl; i++) { \ 1461 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1462 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1463 if (!vm && !vext_elem_mask(v0, i)) { \ 1464 /* set masked-off elements to 1s */ \ 1465 if (vma) { \ 1466 vext_set_elem_mask(vd, i, 1); \ 1467 } \ 1468 continue; \ 1469 } \ 1470 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1471 } \ 1472 env->vstart = 0; \ 1473 /* 1474 * mask destination register are always tail-agnostic 1475 * set tail elements to 1s 1476 */ \ 1477 if (vta_all_1s) { \ 1478 for (; i < total_elems; i++) { \ 1479 vext_set_elem_mask(vd, i, 1); \ 1480 } \ 1481 } \ 1482 } 1483 1484 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1485 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1486 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1487 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1488 1489 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1490 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1491 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1492 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1493 1494 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1495 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1496 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1497 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1498 1499 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1500 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1501 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1502 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1503 1504 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1505 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1506 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1507 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1508 1509 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1510 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1511 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1512 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1513 1514 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1515 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1516 CPURISCVState *env, uint32_t desc) \ 1517 { \ 1518 uint32_t vm = vext_vm(desc); \ 1519 uint32_t vl = env->vl; \ 1520 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1521 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1522 uint32_t vma = vext_vma(desc); \ 1523 uint32_t i; \ 1524 \ 1525 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1526 \ 1527 for (i = env->vstart; i < vl; i++) { \ 1528 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1529 if (!vm && !vext_elem_mask(v0, i)) { \ 1530 /* set masked-off elements to 1s */ \ 1531 if (vma) { \ 1532 vext_set_elem_mask(vd, i, 1); \ 1533 } \ 1534 continue; \ 1535 } \ 1536 vext_set_elem_mask(vd, i, \ 1537 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1538 } \ 1539 env->vstart = 0; \ 1540 /* 1541 * mask destination register are always tail-agnostic 1542 * set tail elements to 1s 1543 */ \ 1544 if (vta_all_1s) { \ 1545 for (; i < total_elems; i++) { \ 1546 vext_set_elem_mask(vd, i, 1); \ 1547 } \ 1548 } \ 1549 } 1550 1551 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1552 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1553 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1554 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1555 1556 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1557 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1558 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1559 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1560 1561 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1562 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1563 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1564 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1565 1566 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1567 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1568 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1569 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1570 1571 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1572 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1573 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1574 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1575 1576 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1577 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1578 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1579 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1580 1581 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1582 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1583 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1584 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1585 1586 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1587 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1588 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1589 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1590 1591 /* Vector Integer Min/Max Instructions */ 1592 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1593 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1594 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1595 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1596 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1597 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1598 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1599 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1600 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1601 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1602 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1603 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1604 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1605 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1606 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1607 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1608 GEN_VEXT_VV(vminu_vv_b, 1) 1609 GEN_VEXT_VV(vminu_vv_h, 2) 1610 GEN_VEXT_VV(vminu_vv_w, 4) 1611 GEN_VEXT_VV(vminu_vv_d, 8) 1612 GEN_VEXT_VV(vmin_vv_b, 1) 1613 GEN_VEXT_VV(vmin_vv_h, 2) 1614 GEN_VEXT_VV(vmin_vv_w, 4) 1615 GEN_VEXT_VV(vmin_vv_d, 8) 1616 GEN_VEXT_VV(vmaxu_vv_b, 1) 1617 GEN_VEXT_VV(vmaxu_vv_h, 2) 1618 GEN_VEXT_VV(vmaxu_vv_w, 4) 1619 GEN_VEXT_VV(vmaxu_vv_d, 8) 1620 GEN_VEXT_VV(vmax_vv_b, 1) 1621 GEN_VEXT_VV(vmax_vv_h, 2) 1622 GEN_VEXT_VV(vmax_vv_w, 4) 1623 GEN_VEXT_VV(vmax_vv_d, 8) 1624 1625 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1626 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1627 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1628 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1629 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1630 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1631 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1632 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1633 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1634 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1635 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1636 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1637 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1638 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1639 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1640 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1641 GEN_VEXT_VX(vminu_vx_b, 1) 1642 GEN_VEXT_VX(vminu_vx_h, 2) 1643 GEN_VEXT_VX(vminu_vx_w, 4) 1644 GEN_VEXT_VX(vminu_vx_d, 8) 1645 GEN_VEXT_VX(vmin_vx_b, 1) 1646 GEN_VEXT_VX(vmin_vx_h, 2) 1647 GEN_VEXT_VX(vmin_vx_w, 4) 1648 GEN_VEXT_VX(vmin_vx_d, 8) 1649 GEN_VEXT_VX(vmaxu_vx_b, 1) 1650 GEN_VEXT_VX(vmaxu_vx_h, 2) 1651 GEN_VEXT_VX(vmaxu_vx_w, 4) 1652 GEN_VEXT_VX(vmaxu_vx_d, 8) 1653 GEN_VEXT_VX(vmax_vx_b, 1) 1654 GEN_VEXT_VX(vmax_vx_h, 2) 1655 GEN_VEXT_VX(vmax_vx_w, 4) 1656 GEN_VEXT_VX(vmax_vx_d, 8) 1657 1658 /* Vector Single-Width Integer Multiply Instructions */ 1659 #define DO_MUL(N, M) (N * M) 1660 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1661 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1662 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1663 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1664 GEN_VEXT_VV(vmul_vv_b, 1) 1665 GEN_VEXT_VV(vmul_vv_h, 2) 1666 GEN_VEXT_VV(vmul_vv_w, 4) 1667 GEN_VEXT_VV(vmul_vv_d, 8) 1668 1669 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1670 { 1671 return (int16_t)s2 * (int16_t)s1 >> 8; 1672 } 1673 1674 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1675 { 1676 return (int32_t)s2 * (int32_t)s1 >> 16; 1677 } 1678 1679 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1680 { 1681 return (int64_t)s2 * (int64_t)s1 >> 32; 1682 } 1683 1684 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1685 { 1686 uint64_t hi_64, lo_64; 1687 1688 muls64(&lo_64, &hi_64, s1, s2); 1689 return hi_64; 1690 } 1691 1692 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1693 { 1694 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1695 } 1696 1697 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1698 { 1699 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1700 } 1701 1702 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1703 { 1704 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1705 } 1706 1707 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1708 { 1709 uint64_t hi_64, lo_64; 1710 1711 mulu64(&lo_64, &hi_64, s2, s1); 1712 return hi_64; 1713 } 1714 1715 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1716 { 1717 return (int16_t)s2 * (uint16_t)s1 >> 8; 1718 } 1719 1720 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1721 { 1722 return (int32_t)s2 * (uint32_t)s1 >> 16; 1723 } 1724 1725 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1726 { 1727 return (int64_t)s2 * (uint64_t)s1 >> 32; 1728 } 1729 1730 /* 1731 * Let A = signed operand, 1732 * B = unsigned operand 1733 * P = mulu64(A, B), unsigned product 1734 * 1735 * LET X = 2 ** 64 - A, 2's complement of A 1736 * SP = signed product 1737 * THEN 1738 * IF A < 0 1739 * SP = -X * B 1740 * = -(2 ** 64 - A) * B 1741 * = A * B - 2 ** 64 * B 1742 * = P - 2 ** 64 * B 1743 * ELSE 1744 * SP = P 1745 * THEN 1746 * HI_P -= (A < 0 ? B : 0) 1747 */ 1748 1749 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1750 { 1751 uint64_t hi_64, lo_64; 1752 1753 mulu64(&lo_64, &hi_64, s2, s1); 1754 1755 hi_64 -= s2 < 0 ? s1 : 0; 1756 return hi_64; 1757 } 1758 1759 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1760 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1761 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1762 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1763 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1764 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1765 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1766 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1767 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1768 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1769 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1770 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1771 GEN_VEXT_VV(vmulh_vv_b, 1) 1772 GEN_VEXT_VV(vmulh_vv_h, 2) 1773 GEN_VEXT_VV(vmulh_vv_w, 4) 1774 GEN_VEXT_VV(vmulh_vv_d, 8) 1775 GEN_VEXT_VV(vmulhu_vv_b, 1) 1776 GEN_VEXT_VV(vmulhu_vv_h, 2) 1777 GEN_VEXT_VV(vmulhu_vv_w, 4) 1778 GEN_VEXT_VV(vmulhu_vv_d, 8) 1779 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1780 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1781 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1782 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1783 1784 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1785 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1786 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1787 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1788 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1789 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1790 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1791 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1792 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1793 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1794 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1795 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1796 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1797 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1798 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1799 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1800 GEN_VEXT_VX(vmul_vx_b, 1) 1801 GEN_VEXT_VX(vmul_vx_h, 2) 1802 GEN_VEXT_VX(vmul_vx_w, 4) 1803 GEN_VEXT_VX(vmul_vx_d, 8) 1804 GEN_VEXT_VX(vmulh_vx_b, 1) 1805 GEN_VEXT_VX(vmulh_vx_h, 2) 1806 GEN_VEXT_VX(vmulh_vx_w, 4) 1807 GEN_VEXT_VX(vmulh_vx_d, 8) 1808 GEN_VEXT_VX(vmulhu_vx_b, 1) 1809 GEN_VEXT_VX(vmulhu_vx_h, 2) 1810 GEN_VEXT_VX(vmulhu_vx_w, 4) 1811 GEN_VEXT_VX(vmulhu_vx_d, 8) 1812 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1813 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1814 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1815 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1816 1817 /* Vector Integer Divide Instructions */ 1818 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1819 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1820 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1821 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1822 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1823 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1824 1825 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1826 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1827 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1828 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1829 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1830 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1831 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1832 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1833 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1834 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1835 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1836 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1837 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1838 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1839 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1840 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1841 GEN_VEXT_VV(vdivu_vv_b, 1) 1842 GEN_VEXT_VV(vdivu_vv_h, 2) 1843 GEN_VEXT_VV(vdivu_vv_w, 4) 1844 GEN_VEXT_VV(vdivu_vv_d, 8) 1845 GEN_VEXT_VV(vdiv_vv_b, 1) 1846 GEN_VEXT_VV(vdiv_vv_h, 2) 1847 GEN_VEXT_VV(vdiv_vv_w, 4) 1848 GEN_VEXT_VV(vdiv_vv_d, 8) 1849 GEN_VEXT_VV(vremu_vv_b, 1) 1850 GEN_VEXT_VV(vremu_vv_h, 2) 1851 GEN_VEXT_VV(vremu_vv_w, 4) 1852 GEN_VEXT_VV(vremu_vv_d, 8) 1853 GEN_VEXT_VV(vrem_vv_b, 1) 1854 GEN_VEXT_VV(vrem_vv_h, 2) 1855 GEN_VEXT_VV(vrem_vv_w, 4) 1856 GEN_VEXT_VV(vrem_vv_d, 8) 1857 1858 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1859 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1860 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1861 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1862 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1863 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1864 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1865 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1866 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1867 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1868 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1869 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1870 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1871 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1872 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1873 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1874 GEN_VEXT_VX(vdivu_vx_b, 1) 1875 GEN_VEXT_VX(vdivu_vx_h, 2) 1876 GEN_VEXT_VX(vdivu_vx_w, 4) 1877 GEN_VEXT_VX(vdivu_vx_d, 8) 1878 GEN_VEXT_VX(vdiv_vx_b, 1) 1879 GEN_VEXT_VX(vdiv_vx_h, 2) 1880 GEN_VEXT_VX(vdiv_vx_w, 4) 1881 GEN_VEXT_VX(vdiv_vx_d, 8) 1882 GEN_VEXT_VX(vremu_vx_b, 1) 1883 GEN_VEXT_VX(vremu_vx_h, 2) 1884 GEN_VEXT_VX(vremu_vx_w, 4) 1885 GEN_VEXT_VX(vremu_vx_d, 8) 1886 GEN_VEXT_VX(vrem_vx_b, 1) 1887 GEN_VEXT_VX(vrem_vx_h, 2) 1888 GEN_VEXT_VX(vrem_vx_w, 4) 1889 GEN_VEXT_VX(vrem_vx_d, 8) 1890 1891 /* Vector Widening Integer Multiply Instructions */ 1892 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1893 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1894 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1895 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1896 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1897 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1898 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1899 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1900 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1901 GEN_VEXT_VV(vwmul_vv_b, 2) 1902 GEN_VEXT_VV(vwmul_vv_h, 4) 1903 GEN_VEXT_VV(vwmul_vv_w, 8) 1904 GEN_VEXT_VV(vwmulu_vv_b, 2) 1905 GEN_VEXT_VV(vwmulu_vv_h, 4) 1906 GEN_VEXT_VV(vwmulu_vv_w, 8) 1907 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1908 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1909 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1910 1911 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1912 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1913 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1914 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1915 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1916 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1917 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1918 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1919 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1920 GEN_VEXT_VX(vwmul_vx_b, 2) 1921 GEN_VEXT_VX(vwmul_vx_h, 4) 1922 GEN_VEXT_VX(vwmul_vx_w, 8) 1923 GEN_VEXT_VX(vwmulu_vx_b, 2) 1924 GEN_VEXT_VX(vwmulu_vx_h, 4) 1925 GEN_VEXT_VX(vwmulu_vx_w, 8) 1926 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1927 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1928 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1929 1930 /* Vector Single-Width Integer Multiply-Add Instructions */ 1931 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1932 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1933 { \ 1934 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1935 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1936 TD d = *((TD *)vd + HD(i)); \ 1937 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1938 } 1939 1940 #define DO_MACC(N, M, D) (M * N + D) 1941 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1942 #define DO_MADD(N, M, D) (M * D + N) 1943 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1944 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1945 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1946 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1947 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1948 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1949 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1950 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1951 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1952 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1953 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1954 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1955 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1956 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1957 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1958 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1959 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1960 GEN_VEXT_VV(vmacc_vv_b, 1) 1961 GEN_VEXT_VV(vmacc_vv_h, 2) 1962 GEN_VEXT_VV(vmacc_vv_w, 4) 1963 GEN_VEXT_VV(vmacc_vv_d, 8) 1964 GEN_VEXT_VV(vnmsac_vv_b, 1) 1965 GEN_VEXT_VV(vnmsac_vv_h, 2) 1966 GEN_VEXT_VV(vnmsac_vv_w, 4) 1967 GEN_VEXT_VV(vnmsac_vv_d, 8) 1968 GEN_VEXT_VV(vmadd_vv_b, 1) 1969 GEN_VEXT_VV(vmadd_vv_h, 2) 1970 GEN_VEXT_VV(vmadd_vv_w, 4) 1971 GEN_VEXT_VV(vmadd_vv_d, 8) 1972 GEN_VEXT_VV(vnmsub_vv_b, 1) 1973 GEN_VEXT_VV(vnmsub_vv_h, 2) 1974 GEN_VEXT_VV(vnmsub_vv_w, 4) 1975 GEN_VEXT_VV(vnmsub_vv_d, 8) 1976 1977 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1978 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1979 { \ 1980 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1981 TD d = *((TD *)vd + HD(i)); \ 1982 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1983 } 1984 1985 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1986 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1987 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1988 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1989 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1990 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1991 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1992 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1993 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1994 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1995 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1996 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1997 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1998 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1999 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 2000 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 2001 GEN_VEXT_VX(vmacc_vx_b, 1) 2002 GEN_VEXT_VX(vmacc_vx_h, 2) 2003 GEN_VEXT_VX(vmacc_vx_w, 4) 2004 GEN_VEXT_VX(vmacc_vx_d, 8) 2005 GEN_VEXT_VX(vnmsac_vx_b, 1) 2006 GEN_VEXT_VX(vnmsac_vx_h, 2) 2007 GEN_VEXT_VX(vnmsac_vx_w, 4) 2008 GEN_VEXT_VX(vnmsac_vx_d, 8) 2009 GEN_VEXT_VX(vmadd_vx_b, 1) 2010 GEN_VEXT_VX(vmadd_vx_h, 2) 2011 GEN_VEXT_VX(vmadd_vx_w, 4) 2012 GEN_VEXT_VX(vmadd_vx_d, 8) 2013 GEN_VEXT_VX(vnmsub_vx_b, 1) 2014 GEN_VEXT_VX(vnmsub_vx_h, 2) 2015 GEN_VEXT_VX(vnmsub_vx_w, 4) 2016 GEN_VEXT_VX(vnmsub_vx_d, 8) 2017 2018 /* Vector Widening Integer Multiply-Add Instructions */ 2019 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 2020 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 2021 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 2022 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 2023 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 2024 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 2025 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 2026 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 2027 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 2028 GEN_VEXT_VV(vwmaccu_vv_b, 2) 2029 GEN_VEXT_VV(vwmaccu_vv_h, 4) 2030 GEN_VEXT_VV(vwmaccu_vv_w, 8) 2031 GEN_VEXT_VV(vwmacc_vv_b, 2) 2032 GEN_VEXT_VV(vwmacc_vv_h, 4) 2033 GEN_VEXT_VV(vwmacc_vv_w, 8) 2034 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 2035 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 2036 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 2037 2038 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 2039 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 2040 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 2041 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 2042 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 2043 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 2044 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 2045 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 2046 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 2047 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 2048 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 2049 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 2050 GEN_VEXT_VX(vwmaccu_vx_b, 2) 2051 GEN_VEXT_VX(vwmaccu_vx_h, 4) 2052 GEN_VEXT_VX(vwmaccu_vx_w, 8) 2053 GEN_VEXT_VX(vwmacc_vx_b, 2) 2054 GEN_VEXT_VX(vwmacc_vx_h, 4) 2055 GEN_VEXT_VX(vwmacc_vx_w, 8) 2056 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 2057 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 2058 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 2059 GEN_VEXT_VX(vwmaccus_vx_b, 2) 2060 GEN_VEXT_VX(vwmaccus_vx_h, 4) 2061 GEN_VEXT_VX(vwmaccus_vx_w, 8) 2062 2063 /* Vector Integer Merge and Move Instructions */ 2064 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 2065 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2066 uint32_t desc) \ 2067 { \ 2068 uint32_t vl = env->vl; \ 2069 uint32_t esz = sizeof(ETYPE); \ 2070 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2071 uint32_t vta = vext_vta(desc); \ 2072 uint32_t i; \ 2073 \ 2074 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2075 \ 2076 for (i = env->vstart; i < vl; i++) { \ 2077 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2078 *((ETYPE *)vd + H(i)) = s1; \ 2079 } \ 2080 env->vstart = 0; \ 2081 /* set tail elements to 1s */ \ 2082 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2083 } 2084 2085 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2086 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2087 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2088 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2089 2090 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2091 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2092 uint32_t desc) \ 2093 { \ 2094 uint32_t vl = env->vl; \ 2095 uint32_t esz = sizeof(ETYPE); \ 2096 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2097 uint32_t vta = vext_vta(desc); \ 2098 uint32_t i; \ 2099 \ 2100 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2101 \ 2102 for (i = env->vstart; i < vl; i++) { \ 2103 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2104 } \ 2105 env->vstart = 0; \ 2106 /* set tail elements to 1s */ \ 2107 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2108 } 2109 2110 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2111 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2112 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2113 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2114 2115 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2116 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2117 CPURISCVState *env, uint32_t desc) \ 2118 { \ 2119 uint32_t vl = env->vl; \ 2120 uint32_t esz = sizeof(ETYPE); \ 2121 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2122 uint32_t vta = vext_vta(desc); \ 2123 uint32_t i; \ 2124 \ 2125 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2126 \ 2127 for (i = env->vstart; i < vl; i++) { \ 2128 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2129 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2130 } \ 2131 env->vstart = 0; \ 2132 /* set tail elements to 1s */ \ 2133 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2134 } 2135 2136 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2137 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2138 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2139 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2140 2141 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2142 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2143 void *vs2, CPURISCVState *env, uint32_t desc) \ 2144 { \ 2145 uint32_t vl = env->vl; \ 2146 uint32_t esz = sizeof(ETYPE); \ 2147 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2148 uint32_t vta = vext_vta(desc); \ 2149 uint32_t i; \ 2150 \ 2151 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2152 \ 2153 for (i = env->vstart; i < vl; i++) { \ 2154 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2155 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2156 (ETYPE)(target_long)s1); \ 2157 *((ETYPE *)vd + H(i)) = d; \ 2158 } \ 2159 env->vstart = 0; \ 2160 /* set tail elements to 1s */ \ 2161 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2162 } 2163 2164 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2165 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2166 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2167 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2168 2169 /* 2170 * Vector Fixed-Point Arithmetic Instructions 2171 */ 2172 2173 /* Vector Single-Width Saturating Add and Subtract */ 2174 2175 /* 2176 * As fixed point instructions probably have round mode and saturation, 2177 * define common macros for fixed point here. 2178 */ 2179 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2180 CPURISCVState *env, int vxrm); 2181 2182 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2183 static inline void \ 2184 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2185 CPURISCVState *env, int vxrm) \ 2186 { \ 2187 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2188 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2189 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2190 } 2191 2192 static inline void 2193 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2194 CPURISCVState *env, 2195 uint32_t vl, uint32_t vm, int vxrm, 2196 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2197 { 2198 for (uint32_t i = env->vstart; i < vl; i++) { 2199 if (!vm && !vext_elem_mask(v0, i)) { 2200 /* set masked-off elements to 1s */ 2201 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2202 continue; 2203 } 2204 fn(vd, vs1, vs2, i, env, vxrm); 2205 } 2206 env->vstart = 0; 2207 } 2208 2209 static inline void 2210 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2211 CPURISCVState *env, 2212 uint32_t desc, 2213 opivv2_rm_fn *fn, uint32_t esz) 2214 { 2215 uint32_t vm = vext_vm(desc); 2216 uint32_t vl = env->vl; 2217 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2218 uint32_t vta = vext_vta(desc); 2219 uint32_t vma = vext_vma(desc); 2220 2221 VSTART_CHECK_EARLY_EXIT(env, vl); 2222 2223 switch (env->vxrm) { 2224 case 0: /* rnu */ 2225 vext_vv_rm_1(vd, v0, vs1, vs2, 2226 env, vl, vm, 0, fn, vma, esz); 2227 break; 2228 case 1: /* rne */ 2229 vext_vv_rm_1(vd, v0, vs1, vs2, 2230 env, vl, vm, 1, fn, vma, esz); 2231 break; 2232 case 2: /* rdn */ 2233 vext_vv_rm_1(vd, v0, vs1, vs2, 2234 env, vl, vm, 2, fn, vma, esz); 2235 break; 2236 default: /* rod */ 2237 vext_vv_rm_1(vd, v0, vs1, vs2, 2238 env, vl, vm, 3, fn, vma, esz); 2239 break; 2240 } 2241 /* set tail elements to 1s */ 2242 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2243 } 2244 2245 /* generate helpers for fixed point instructions with OPIVV format */ 2246 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2247 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2248 CPURISCVState *env, uint32_t desc) \ 2249 { \ 2250 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2251 do_##NAME, ESZ); \ 2252 } 2253 2254 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2255 uint8_t b) 2256 { 2257 uint8_t res = a + b; 2258 if (res < a) { 2259 res = UINT8_MAX; 2260 env->vxsat = 0x1; 2261 } 2262 return res; 2263 } 2264 2265 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2266 uint16_t b) 2267 { 2268 uint16_t res = a + b; 2269 if (res < a) { 2270 res = UINT16_MAX; 2271 env->vxsat = 0x1; 2272 } 2273 return res; 2274 } 2275 2276 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2277 uint32_t b) 2278 { 2279 uint32_t res = a + b; 2280 if (res < a) { 2281 res = UINT32_MAX; 2282 env->vxsat = 0x1; 2283 } 2284 return res; 2285 } 2286 2287 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2288 uint64_t b) 2289 { 2290 uint64_t res = a + b; 2291 if (res < a) { 2292 res = UINT64_MAX; 2293 env->vxsat = 0x1; 2294 } 2295 return res; 2296 } 2297 2298 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2299 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2300 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2301 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2302 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2303 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2304 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2305 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2306 2307 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2308 CPURISCVState *env, int vxrm); 2309 2310 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2311 static inline void \ 2312 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2313 CPURISCVState *env, int vxrm) \ 2314 { \ 2315 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2316 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2317 } 2318 2319 static inline void 2320 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2321 CPURISCVState *env, 2322 uint32_t vl, uint32_t vm, int vxrm, 2323 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2324 { 2325 for (uint32_t i = env->vstart; i < vl; i++) { 2326 if (!vm && !vext_elem_mask(v0, i)) { 2327 /* set masked-off elements to 1s */ 2328 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2329 continue; 2330 } 2331 fn(vd, s1, vs2, i, env, vxrm); 2332 } 2333 env->vstart = 0; 2334 } 2335 2336 static inline void 2337 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2338 CPURISCVState *env, 2339 uint32_t desc, 2340 opivx2_rm_fn *fn, uint32_t esz) 2341 { 2342 uint32_t vm = vext_vm(desc); 2343 uint32_t vl = env->vl; 2344 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2345 uint32_t vta = vext_vta(desc); 2346 uint32_t vma = vext_vma(desc); 2347 2348 VSTART_CHECK_EARLY_EXIT(env, vl); 2349 2350 switch (env->vxrm) { 2351 case 0: /* rnu */ 2352 vext_vx_rm_1(vd, v0, s1, vs2, 2353 env, vl, vm, 0, fn, vma, esz); 2354 break; 2355 case 1: /* rne */ 2356 vext_vx_rm_1(vd, v0, s1, vs2, 2357 env, vl, vm, 1, fn, vma, esz); 2358 break; 2359 case 2: /* rdn */ 2360 vext_vx_rm_1(vd, v0, s1, vs2, 2361 env, vl, vm, 2, fn, vma, esz); 2362 break; 2363 default: /* rod */ 2364 vext_vx_rm_1(vd, v0, s1, vs2, 2365 env, vl, vm, 3, fn, vma, esz); 2366 break; 2367 } 2368 /* set tail elements to 1s */ 2369 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2370 } 2371 2372 /* generate helpers for fixed point instructions with OPIVX format */ 2373 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2374 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2375 void *vs2, CPURISCVState *env, \ 2376 uint32_t desc) \ 2377 { \ 2378 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2379 do_##NAME, ESZ); \ 2380 } 2381 2382 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2383 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2384 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2385 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2386 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2387 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2388 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2389 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2390 2391 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2392 { 2393 int8_t res = a + b; 2394 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2395 res = a > 0 ? INT8_MAX : INT8_MIN; 2396 env->vxsat = 0x1; 2397 } 2398 return res; 2399 } 2400 2401 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2402 int16_t b) 2403 { 2404 int16_t res = a + b; 2405 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2406 res = a > 0 ? INT16_MAX : INT16_MIN; 2407 env->vxsat = 0x1; 2408 } 2409 return res; 2410 } 2411 2412 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2413 int32_t b) 2414 { 2415 int32_t res = a + b; 2416 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2417 res = a > 0 ? INT32_MAX : INT32_MIN; 2418 env->vxsat = 0x1; 2419 } 2420 return res; 2421 } 2422 2423 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2424 int64_t b) 2425 { 2426 int64_t res = a + b; 2427 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2428 res = a > 0 ? INT64_MAX : INT64_MIN; 2429 env->vxsat = 0x1; 2430 } 2431 return res; 2432 } 2433 2434 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2435 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2436 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2437 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2438 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2439 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2440 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2441 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2442 2443 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2444 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2445 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2446 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2447 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2448 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2449 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2450 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2451 2452 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2453 uint8_t b) 2454 { 2455 uint8_t res = a - b; 2456 if (res > a) { 2457 res = 0; 2458 env->vxsat = 0x1; 2459 } 2460 return res; 2461 } 2462 2463 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2464 uint16_t b) 2465 { 2466 uint16_t res = a - b; 2467 if (res > a) { 2468 res = 0; 2469 env->vxsat = 0x1; 2470 } 2471 return res; 2472 } 2473 2474 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2475 uint32_t b) 2476 { 2477 uint32_t res = a - b; 2478 if (res > a) { 2479 res = 0; 2480 env->vxsat = 0x1; 2481 } 2482 return res; 2483 } 2484 2485 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2486 uint64_t b) 2487 { 2488 uint64_t res = a - b; 2489 if (res > a) { 2490 res = 0; 2491 env->vxsat = 0x1; 2492 } 2493 return res; 2494 } 2495 2496 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2497 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2498 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2499 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2500 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2501 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2502 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2503 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2504 2505 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2506 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2507 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2508 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2509 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2510 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2511 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2512 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2513 2514 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2515 { 2516 int8_t res = a - b; 2517 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2518 res = a >= 0 ? INT8_MAX : INT8_MIN; 2519 env->vxsat = 0x1; 2520 } 2521 return res; 2522 } 2523 2524 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2525 int16_t b) 2526 { 2527 int16_t res = a - b; 2528 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2529 res = a >= 0 ? INT16_MAX : INT16_MIN; 2530 env->vxsat = 0x1; 2531 } 2532 return res; 2533 } 2534 2535 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2536 int32_t b) 2537 { 2538 int32_t res = a - b; 2539 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2540 res = a >= 0 ? INT32_MAX : INT32_MIN; 2541 env->vxsat = 0x1; 2542 } 2543 return res; 2544 } 2545 2546 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2547 int64_t b) 2548 { 2549 int64_t res = a - b; 2550 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2551 res = a >= 0 ? INT64_MAX : INT64_MIN; 2552 env->vxsat = 0x1; 2553 } 2554 return res; 2555 } 2556 2557 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2558 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2559 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2560 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2561 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2562 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2563 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2564 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2565 2566 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2567 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2568 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2569 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2570 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2571 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2572 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2573 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2574 2575 /* Vector Single-Width Averaging Add and Subtract */ 2576 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2577 { 2578 uint8_t d = extract64(v, shift, 1); 2579 uint8_t d1; 2580 uint64_t D1, D2; 2581 2582 if (shift == 0 || shift > 64) { 2583 return 0; 2584 } 2585 2586 d1 = extract64(v, shift - 1, 1); 2587 D1 = extract64(v, 0, shift); 2588 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2589 return d1; 2590 } else if (vxrm == 1) { /* round-to-nearest-even */ 2591 if (shift > 1) { 2592 D2 = extract64(v, 0, shift - 1); 2593 return d1 & ((D2 != 0) | d); 2594 } else { 2595 return d1 & d; 2596 } 2597 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2598 return !d & (D1 != 0); 2599 } 2600 return 0; /* round-down (truncate) */ 2601 } 2602 2603 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2604 int32_t b) 2605 { 2606 int64_t res = (int64_t)a + b; 2607 uint8_t round = get_round(vxrm, res, 1); 2608 2609 return (res >> 1) + round; 2610 } 2611 2612 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2613 int64_t b) 2614 { 2615 int64_t res = a + b; 2616 uint8_t round = get_round(vxrm, res, 1); 2617 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2618 2619 /* With signed overflow, bit 64 is inverse of bit 63. */ 2620 return ((res >> 1) ^ over) + round; 2621 } 2622 2623 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2624 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2625 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2626 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2627 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2628 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2629 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2630 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2631 2632 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2633 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2634 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2635 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2636 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2637 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2638 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2639 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2640 2641 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2642 uint32_t a, uint32_t b) 2643 { 2644 uint64_t res = (uint64_t)a + b; 2645 uint8_t round = get_round(vxrm, res, 1); 2646 2647 return (res >> 1) + round; 2648 } 2649 2650 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2651 uint64_t a, uint64_t b) 2652 { 2653 uint64_t res = a + b; 2654 uint8_t round = get_round(vxrm, res, 1); 2655 uint64_t over = (uint64_t)(res < a) << 63; 2656 2657 return ((res >> 1) | over) + round; 2658 } 2659 2660 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2661 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2662 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2663 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2664 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2665 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2666 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2667 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2668 2669 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2670 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2671 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2672 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2673 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2674 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2675 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2676 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2677 2678 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2679 int32_t b) 2680 { 2681 int64_t res = (int64_t)a - b; 2682 uint8_t round = get_round(vxrm, res, 1); 2683 2684 return (res >> 1) + round; 2685 } 2686 2687 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2688 int64_t b) 2689 { 2690 int64_t res = (int64_t)a - b; 2691 uint8_t round = get_round(vxrm, res, 1); 2692 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2693 2694 /* With signed overflow, bit 64 is inverse of bit 63. */ 2695 return ((res >> 1) ^ over) + round; 2696 } 2697 2698 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2699 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2700 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2701 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2702 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2703 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2704 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2705 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2706 2707 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2708 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2709 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2710 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2711 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2712 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2713 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2714 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2715 2716 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2717 uint32_t a, uint32_t b) 2718 { 2719 int64_t res = (int64_t)a - b; 2720 uint8_t round = get_round(vxrm, res, 1); 2721 2722 return (res >> 1) + round; 2723 } 2724 2725 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2726 uint64_t a, uint64_t b) 2727 { 2728 uint64_t res = (uint64_t)a - b; 2729 uint8_t round = get_round(vxrm, res, 1); 2730 uint64_t over = (uint64_t)(res > a) << 63; 2731 2732 return ((res >> 1) | over) + round; 2733 } 2734 2735 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2736 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2737 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2738 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2739 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2740 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2741 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2742 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2743 2744 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2745 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2746 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2747 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2748 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2749 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2750 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2751 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2752 2753 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2754 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2755 { 2756 uint8_t round; 2757 int16_t res; 2758 2759 res = (int16_t)a * (int16_t)b; 2760 round = get_round(vxrm, res, 7); 2761 res = (res >> 7) + round; 2762 2763 if (res > INT8_MAX) { 2764 env->vxsat = 0x1; 2765 return INT8_MAX; 2766 } else if (res < INT8_MIN) { 2767 env->vxsat = 0x1; 2768 return INT8_MIN; 2769 } else { 2770 return res; 2771 } 2772 } 2773 2774 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2775 { 2776 uint8_t round; 2777 int32_t res; 2778 2779 res = (int32_t)a * (int32_t)b; 2780 round = get_round(vxrm, res, 15); 2781 res = (res >> 15) + round; 2782 2783 if (res > INT16_MAX) { 2784 env->vxsat = 0x1; 2785 return INT16_MAX; 2786 } else if (res < INT16_MIN) { 2787 env->vxsat = 0x1; 2788 return INT16_MIN; 2789 } else { 2790 return res; 2791 } 2792 } 2793 2794 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2795 { 2796 uint8_t round; 2797 int64_t res; 2798 2799 res = (int64_t)a * (int64_t)b; 2800 round = get_round(vxrm, res, 31); 2801 res = (res >> 31) + round; 2802 2803 if (res > INT32_MAX) { 2804 env->vxsat = 0x1; 2805 return INT32_MAX; 2806 } else if (res < INT32_MIN) { 2807 env->vxsat = 0x1; 2808 return INT32_MIN; 2809 } else { 2810 return res; 2811 } 2812 } 2813 2814 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2815 { 2816 uint8_t round; 2817 uint64_t hi_64, lo_64; 2818 int64_t res; 2819 2820 if (a == INT64_MIN && b == INT64_MIN) { 2821 env->vxsat = 1; 2822 return INT64_MAX; 2823 } 2824 2825 muls64(&lo_64, &hi_64, a, b); 2826 round = get_round(vxrm, lo_64, 63); 2827 /* 2828 * Cannot overflow, as there are always 2829 * 2 sign bits after multiply. 2830 */ 2831 res = (hi_64 << 1) | (lo_64 >> 63); 2832 if (round) { 2833 if (res == INT64_MAX) { 2834 env->vxsat = 1; 2835 } else { 2836 res += 1; 2837 } 2838 } 2839 return res; 2840 } 2841 2842 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2843 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2844 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2845 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2846 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2847 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2848 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2849 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2850 2851 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2852 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2853 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2854 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2855 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2856 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2857 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2858 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2859 2860 /* Vector Single-Width Scaling Shift Instructions */ 2861 static inline uint8_t 2862 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2863 { 2864 uint8_t round, shift = b & 0x7; 2865 uint8_t res; 2866 2867 round = get_round(vxrm, a, shift); 2868 res = (a >> shift) + round; 2869 return res; 2870 } 2871 static inline uint16_t 2872 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2873 { 2874 uint8_t round, shift = b & 0xf; 2875 2876 round = get_round(vxrm, a, shift); 2877 return (a >> shift) + round; 2878 } 2879 static inline uint32_t 2880 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2881 { 2882 uint8_t round, shift = b & 0x1f; 2883 2884 round = get_round(vxrm, a, shift); 2885 return (a >> shift) + round; 2886 } 2887 static inline uint64_t 2888 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2889 { 2890 uint8_t round, shift = b & 0x3f; 2891 2892 round = get_round(vxrm, a, shift); 2893 return (a >> shift) + round; 2894 } 2895 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2896 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2897 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2898 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2899 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2900 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2901 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2902 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2903 2904 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2905 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2906 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2907 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2908 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2909 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2910 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2911 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2912 2913 static inline int8_t 2914 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2915 { 2916 uint8_t round, shift = b & 0x7; 2917 2918 round = get_round(vxrm, a, shift); 2919 return (a >> shift) + round; 2920 } 2921 static inline int16_t 2922 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2923 { 2924 uint8_t round, shift = b & 0xf; 2925 2926 round = get_round(vxrm, a, shift); 2927 return (a >> shift) + round; 2928 } 2929 static inline int32_t 2930 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2931 { 2932 uint8_t round, shift = b & 0x1f; 2933 2934 round = get_round(vxrm, a, shift); 2935 return (a >> shift) + round; 2936 } 2937 static inline int64_t 2938 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2939 { 2940 uint8_t round, shift = b & 0x3f; 2941 2942 round = get_round(vxrm, a, shift); 2943 return (a >> shift) + round; 2944 } 2945 2946 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2947 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2948 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2949 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2950 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2951 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2952 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2953 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2954 2955 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2956 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2957 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2958 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2959 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2960 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2961 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2962 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2963 2964 /* Vector Narrowing Fixed-Point Clip Instructions */ 2965 static inline int8_t 2966 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2967 { 2968 uint8_t round, shift = b & 0xf; 2969 int16_t res; 2970 2971 round = get_round(vxrm, a, shift); 2972 res = (a >> shift) + round; 2973 if (res > INT8_MAX) { 2974 env->vxsat = 0x1; 2975 return INT8_MAX; 2976 } else if (res < INT8_MIN) { 2977 env->vxsat = 0x1; 2978 return INT8_MIN; 2979 } else { 2980 return res; 2981 } 2982 } 2983 2984 static inline int16_t 2985 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2986 { 2987 uint8_t round, shift = b & 0x1f; 2988 int32_t res; 2989 2990 round = get_round(vxrm, a, shift); 2991 res = (a >> shift) + round; 2992 if (res > INT16_MAX) { 2993 env->vxsat = 0x1; 2994 return INT16_MAX; 2995 } else if (res < INT16_MIN) { 2996 env->vxsat = 0x1; 2997 return INT16_MIN; 2998 } else { 2999 return res; 3000 } 3001 } 3002 3003 static inline int32_t 3004 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 3005 { 3006 uint8_t round, shift = b & 0x3f; 3007 int64_t res; 3008 3009 round = get_round(vxrm, a, shift); 3010 res = (a >> shift) + round; 3011 if (res > INT32_MAX) { 3012 env->vxsat = 0x1; 3013 return INT32_MAX; 3014 } else if (res < INT32_MIN) { 3015 env->vxsat = 0x1; 3016 return INT32_MIN; 3017 } else { 3018 return res; 3019 } 3020 } 3021 3022 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 3023 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 3024 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 3025 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 3026 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 3027 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 3028 3029 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 3030 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 3031 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 3032 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 3033 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 3034 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 3035 3036 static inline uint8_t 3037 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 3038 { 3039 uint8_t round, shift = b & 0xf; 3040 uint16_t res; 3041 3042 round = get_round(vxrm, a, shift); 3043 res = (a >> shift) + round; 3044 if (res > UINT8_MAX) { 3045 env->vxsat = 0x1; 3046 return UINT8_MAX; 3047 } else { 3048 return res; 3049 } 3050 } 3051 3052 static inline uint16_t 3053 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3054 { 3055 uint8_t round, shift = b & 0x1f; 3056 uint32_t res; 3057 3058 round = get_round(vxrm, a, shift); 3059 res = (a >> shift) + round; 3060 if (res > UINT16_MAX) { 3061 env->vxsat = 0x1; 3062 return UINT16_MAX; 3063 } else { 3064 return res; 3065 } 3066 } 3067 3068 static inline uint32_t 3069 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3070 { 3071 uint8_t round, shift = b & 0x3f; 3072 uint64_t res; 3073 3074 round = get_round(vxrm, a, shift); 3075 res = (a >> shift) + round; 3076 if (res > UINT32_MAX) { 3077 env->vxsat = 0x1; 3078 return UINT32_MAX; 3079 } else { 3080 return res; 3081 } 3082 } 3083 3084 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3085 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3086 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3087 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3088 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3089 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3090 3091 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3092 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3093 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3094 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3095 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3096 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3097 3098 /* 3099 * Vector Float Point Arithmetic Instructions 3100 */ 3101 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3102 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3103 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3104 CPURISCVState *env) \ 3105 { \ 3106 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3107 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3108 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3109 } 3110 3111 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3112 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3113 void *vs2, CPURISCVState *env, \ 3114 uint32_t desc) \ 3115 { \ 3116 uint32_t vm = vext_vm(desc); \ 3117 uint32_t vl = env->vl; \ 3118 uint32_t total_elems = \ 3119 vext_get_total_elems(env, desc, ESZ); \ 3120 uint32_t vta = vext_vta(desc); \ 3121 uint32_t vma = vext_vma(desc); \ 3122 uint32_t i; \ 3123 \ 3124 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3125 \ 3126 for (i = env->vstart; i < vl; i++) { \ 3127 if (!vm && !vext_elem_mask(v0, i)) { \ 3128 /* set masked-off elements to 1s */ \ 3129 vext_set_elems_1s(vd, vma, i * ESZ, \ 3130 (i + 1) * ESZ); \ 3131 continue; \ 3132 } \ 3133 do_##NAME(vd, vs1, vs2, i, env); \ 3134 } \ 3135 env->vstart = 0; \ 3136 /* set tail elements to 1s */ \ 3137 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3138 total_elems * ESZ); \ 3139 } 3140 3141 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3142 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3143 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3144 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3145 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3146 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3147 3148 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3149 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3150 CPURISCVState *env) \ 3151 { \ 3152 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3153 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3154 } 3155 3156 #define GEN_VEXT_VF(NAME, ESZ) \ 3157 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3158 void *vs2, CPURISCVState *env, \ 3159 uint32_t desc) \ 3160 { \ 3161 uint32_t vm = vext_vm(desc); \ 3162 uint32_t vl = env->vl; \ 3163 uint32_t total_elems = \ 3164 vext_get_total_elems(env, desc, ESZ); \ 3165 uint32_t vta = vext_vta(desc); \ 3166 uint32_t vma = vext_vma(desc); \ 3167 uint32_t i; \ 3168 \ 3169 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3170 \ 3171 for (i = env->vstart; i < vl; i++) { \ 3172 if (!vm && !vext_elem_mask(v0, i)) { \ 3173 /* set masked-off elements to 1s */ \ 3174 vext_set_elems_1s(vd, vma, i * ESZ, \ 3175 (i + 1) * ESZ); \ 3176 continue; \ 3177 } \ 3178 do_##NAME(vd, s1, vs2, i, env); \ 3179 } \ 3180 env->vstart = 0; \ 3181 /* set tail elements to 1s */ \ 3182 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3183 total_elems * ESZ); \ 3184 } 3185 3186 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3187 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3188 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3189 GEN_VEXT_VF(vfadd_vf_h, 2) 3190 GEN_VEXT_VF(vfadd_vf_w, 4) 3191 GEN_VEXT_VF(vfadd_vf_d, 8) 3192 3193 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3194 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3195 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3196 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3197 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3198 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3199 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3200 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3201 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3202 GEN_VEXT_VF(vfsub_vf_h, 2) 3203 GEN_VEXT_VF(vfsub_vf_w, 4) 3204 GEN_VEXT_VF(vfsub_vf_d, 8) 3205 3206 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3207 { 3208 return float16_sub(b, a, s); 3209 } 3210 3211 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3212 { 3213 return float32_sub(b, a, s); 3214 } 3215 3216 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3217 { 3218 return float64_sub(b, a, s); 3219 } 3220 3221 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3222 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3223 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3224 GEN_VEXT_VF(vfrsub_vf_h, 2) 3225 GEN_VEXT_VF(vfrsub_vf_w, 4) 3226 GEN_VEXT_VF(vfrsub_vf_d, 8) 3227 3228 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3229 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3230 { 3231 return float32_add(float16_to_float32(a, true, s), 3232 float16_to_float32(b, true, s), s); 3233 } 3234 3235 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3236 { 3237 return float64_add(float32_to_float64(a, s), 3238 float32_to_float64(b, s), s); 3239 3240 } 3241 3242 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3243 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3244 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3245 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3246 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3247 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3248 GEN_VEXT_VF(vfwadd_vf_h, 4) 3249 GEN_VEXT_VF(vfwadd_vf_w, 8) 3250 3251 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3252 { 3253 return float32_sub(float16_to_float32(a, true, s), 3254 float16_to_float32(b, true, s), s); 3255 } 3256 3257 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3258 { 3259 return float64_sub(float32_to_float64(a, s), 3260 float32_to_float64(b, s), s); 3261 3262 } 3263 3264 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3265 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3266 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3267 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3268 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3269 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3270 GEN_VEXT_VF(vfwsub_vf_h, 4) 3271 GEN_VEXT_VF(vfwsub_vf_w, 8) 3272 3273 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3274 { 3275 return float32_add(a, float16_to_float32(b, true, s), s); 3276 } 3277 3278 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3279 { 3280 return float64_add(a, float32_to_float64(b, s), s); 3281 } 3282 3283 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3284 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3285 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3286 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3287 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3288 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3289 GEN_VEXT_VF(vfwadd_wf_h, 4) 3290 GEN_VEXT_VF(vfwadd_wf_w, 8) 3291 3292 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3293 { 3294 return float32_sub(a, float16_to_float32(b, true, s), s); 3295 } 3296 3297 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3298 { 3299 return float64_sub(a, float32_to_float64(b, s), s); 3300 } 3301 3302 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3303 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3304 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3305 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3306 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3307 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3308 GEN_VEXT_VF(vfwsub_wf_h, 4) 3309 GEN_VEXT_VF(vfwsub_wf_w, 8) 3310 3311 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3312 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3313 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3314 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3315 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3316 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3317 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3318 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3319 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3320 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3321 GEN_VEXT_VF(vfmul_vf_h, 2) 3322 GEN_VEXT_VF(vfmul_vf_w, 4) 3323 GEN_VEXT_VF(vfmul_vf_d, 8) 3324 3325 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3326 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3327 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3328 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3329 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3330 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3331 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3332 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3333 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3334 GEN_VEXT_VF(vfdiv_vf_h, 2) 3335 GEN_VEXT_VF(vfdiv_vf_w, 4) 3336 GEN_VEXT_VF(vfdiv_vf_d, 8) 3337 3338 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3339 { 3340 return float16_div(b, a, s); 3341 } 3342 3343 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3344 { 3345 return float32_div(b, a, s); 3346 } 3347 3348 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3349 { 3350 return float64_div(b, a, s); 3351 } 3352 3353 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3354 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3355 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3356 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3357 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3358 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3359 3360 /* Vector Widening Floating-Point Multiply */ 3361 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3362 { 3363 return float32_mul(float16_to_float32(a, true, s), 3364 float16_to_float32(b, true, s), s); 3365 } 3366 3367 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3368 { 3369 return float64_mul(float32_to_float64(a, s), 3370 float32_to_float64(b, s), s); 3371 3372 } 3373 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3374 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3375 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3376 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3377 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3378 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3379 GEN_VEXT_VF(vfwmul_vf_h, 4) 3380 GEN_VEXT_VF(vfwmul_vf_w, 8) 3381 3382 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3383 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3384 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3385 CPURISCVState *env) \ 3386 { \ 3387 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3388 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3389 TD d = *((TD *)vd + HD(i)); \ 3390 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3391 } 3392 3393 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3394 { 3395 return float16_muladd(a, b, d, 0, s); 3396 } 3397 3398 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3399 { 3400 return float32_muladd(a, b, d, 0, s); 3401 } 3402 3403 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3404 { 3405 return float64_muladd(a, b, d, 0, s); 3406 } 3407 3408 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3409 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3410 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3411 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3412 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3413 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3414 3415 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3416 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3417 CPURISCVState *env) \ 3418 { \ 3419 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3420 TD d = *((TD *)vd + HD(i)); \ 3421 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3422 } 3423 3424 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3425 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3426 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3427 GEN_VEXT_VF(vfmacc_vf_h, 2) 3428 GEN_VEXT_VF(vfmacc_vf_w, 4) 3429 GEN_VEXT_VF(vfmacc_vf_d, 8) 3430 3431 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3432 { 3433 return float16_muladd(a, b, d, float_muladd_negate_c | 3434 float_muladd_negate_product, s); 3435 } 3436 3437 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3438 { 3439 return float32_muladd(a, b, d, float_muladd_negate_c | 3440 float_muladd_negate_product, s); 3441 } 3442 3443 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3444 { 3445 return float64_muladd(a, b, d, float_muladd_negate_c | 3446 float_muladd_negate_product, s); 3447 } 3448 3449 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3450 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3451 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3452 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3453 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3454 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3455 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3456 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3457 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3458 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3459 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3460 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3461 3462 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3463 { 3464 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3465 } 3466 3467 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3468 { 3469 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3470 } 3471 3472 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3473 { 3474 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3475 } 3476 3477 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3478 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3479 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3480 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3481 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3482 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3483 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3484 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3485 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3486 GEN_VEXT_VF(vfmsac_vf_h, 2) 3487 GEN_VEXT_VF(vfmsac_vf_w, 4) 3488 GEN_VEXT_VF(vfmsac_vf_d, 8) 3489 3490 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3491 { 3492 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3493 } 3494 3495 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3496 { 3497 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3498 } 3499 3500 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3501 { 3502 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3503 } 3504 3505 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3506 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3507 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3508 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3509 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3510 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3511 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3512 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3513 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3514 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3515 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3516 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3517 3518 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3519 { 3520 return float16_muladd(d, b, a, 0, s); 3521 } 3522 3523 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3524 { 3525 return float32_muladd(d, b, a, 0, s); 3526 } 3527 3528 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3529 { 3530 return float64_muladd(d, b, a, 0, s); 3531 } 3532 3533 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3534 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3535 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3536 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3537 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3538 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3539 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3540 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3541 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3542 GEN_VEXT_VF(vfmadd_vf_h, 2) 3543 GEN_VEXT_VF(vfmadd_vf_w, 4) 3544 GEN_VEXT_VF(vfmadd_vf_d, 8) 3545 3546 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3547 { 3548 return float16_muladd(d, b, a, float_muladd_negate_c | 3549 float_muladd_negate_product, s); 3550 } 3551 3552 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3553 { 3554 return float32_muladd(d, b, a, float_muladd_negate_c | 3555 float_muladd_negate_product, s); 3556 } 3557 3558 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3559 { 3560 return float64_muladd(d, b, a, float_muladd_negate_c | 3561 float_muladd_negate_product, s); 3562 } 3563 3564 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3565 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3566 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3567 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3568 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3569 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3570 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3571 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3572 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3573 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3574 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3575 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3576 3577 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3578 { 3579 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3580 } 3581 3582 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3583 { 3584 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3585 } 3586 3587 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3588 { 3589 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3590 } 3591 3592 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3593 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3594 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3595 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3596 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3597 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3598 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3599 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3600 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3601 GEN_VEXT_VF(vfmsub_vf_h, 2) 3602 GEN_VEXT_VF(vfmsub_vf_w, 4) 3603 GEN_VEXT_VF(vfmsub_vf_d, 8) 3604 3605 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3606 { 3607 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3608 } 3609 3610 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3611 { 3612 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3613 } 3614 3615 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3616 { 3617 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3618 } 3619 3620 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3621 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3622 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3623 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3624 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3625 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3626 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3627 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3628 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3629 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3630 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3631 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3632 3633 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3634 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3635 { 3636 return float32_muladd(float16_to_float32(a, true, s), 3637 float16_to_float32(b, true, s), d, 0, s); 3638 } 3639 3640 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3641 { 3642 return float64_muladd(float32_to_float64(a, s), 3643 float32_to_float64(b, s), d, 0, s); 3644 } 3645 3646 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3647 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3648 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3649 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3650 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3651 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3652 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3653 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3654 3655 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3656 { 3657 return float32_muladd(bfloat16_to_float32(a, s), 3658 bfloat16_to_float32(b, s), d, 0, s); 3659 } 3660 3661 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3662 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3663 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3664 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3665 3666 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3667 { 3668 return float32_muladd(float16_to_float32(a, true, s), 3669 float16_to_float32(b, true, s), d, 3670 float_muladd_negate_c | float_muladd_negate_product, 3671 s); 3672 } 3673 3674 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3675 { 3676 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3677 d, float_muladd_negate_c | 3678 float_muladd_negate_product, s); 3679 } 3680 3681 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3682 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3683 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3684 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3685 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3686 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3687 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3688 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3689 3690 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3691 { 3692 return float32_muladd(float16_to_float32(a, true, s), 3693 float16_to_float32(b, true, s), d, 3694 float_muladd_negate_c, s); 3695 } 3696 3697 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3698 { 3699 return float64_muladd(float32_to_float64(a, s), 3700 float32_to_float64(b, s), d, 3701 float_muladd_negate_c, s); 3702 } 3703 3704 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3705 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3706 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3707 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3708 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3709 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3710 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3711 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3712 3713 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3714 { 3715 return float32_muladd(float16_to_float32(a, true, s), 3716 float16_to_float32(b, true, s), d, 3717 float_muladd_negate_product, s); 3718 } 3719 3720 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3721 { 3722 return float64_muladd(float32_to_float64(a, s), 3723 float32_to_float64(b, s), d, 3724 float_muladd_negate_product, s); 3725 } 3726 3727 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3728 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3729 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3730 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3731 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3732 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3733 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3734 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3735 3736 /* Vector Floating-Point Square-Root Instruction */ 3737 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3738 static void do_##NAME(void *vd, void *vs2, int i, \ 3739 CPURISCVState *env) \ 3740 { \ 3741 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3742 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3743 } 3744 3745 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3746 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3747 CPURISCVState *env, uint32_t desc) \ 3748 { \ 3749 uint32_t vm = vext_vm(desc); \ 3750 uint32_t vl = env->vl; \ 3751 uint32_t total_elems = \ 3752 vext_get_total_elems(env, desc, ESZ); \ 3753 uint32_t vta = vext_vta(desc); \ 3754 uint32_t vma = vext_vma(desc); \ 3755 uint32_t i; \ 3756 \ 3757 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3758 \ 3759 if (vl == 0) { \ 3760 return; \ 3761 } \ 3762 for (i = env->vstart; i < vl; i++) { \ 3763 if (!vm && !vext_elem_mask(v0, i)) { \ 3764 /* set masked-off elements to 1s */ \ 3765 vext_set_elems_1s(vd, vma, i * ESZ, \ 3766 (i + 1) * ESZ); \ 3767 continue; \ 3768 } \ 3769 do_##NAME(vd, vs2, i, env); \ 3770 } \ 3771 env->vstart = 0; \ 3772 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3773 total_elems * ESZ); \ 3774 } 3775 3776 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3777 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3778 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3779 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3780 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3781 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3782 3783 /* 3784 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3785 * 3786 * Adapted from riscv-v-spec recip.c: 3787 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3788 */ 3789 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3790 { 3791 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3792 uint64_t exp = extract64(f, frac_size, exp_size); 3793 uint64_t frac = extract64(f, 0, frac_size); 3794 3795 const uint8_t lookup_table[] = { 3796 52, 51, 50, 48, 47, 46, 44, 43, 3797 42, 41, 40, 39, 38, 36, 35, 34, 3798 33, 32, 31, 30, 30, 29, 28, 27, 3799 26, 25, 24, 23, 23, 22, 21, 20, 3800 19, 19, 18, 17, 16, 16, 15, 14, 3801 14, 13, 12, 12, 11, 10, 10, 9, 3802 9, 8, 7, 7, 6, 6, 5, 4, 3803 4, 3, 3, 2, 2, 1, 1, 0, 3804 127, 125, 123, 121, 119, 118, 116, 114, 3805 113, 111, 109, 108, 106, 105, 103, 102, 3806 100, 99, 97, 96, 95, 93, 92, 91, 3807 90, 88, 87, 86, 85, 84, 83, 82, 3808 80, 79, 78, 77, 76, 75, 74, 73, 3809 72, 71, 70, 70, 69, 68, 67, 66, 3810 65, 64, 63, 63, 62, 61, 60, 59, 3811 59, 58, 57, 56, 56, 55, 54, 53 3812 }; 3813 const int precision = 7; 3814 3815 if (exp == 0 && frac != 0) { /* subnormal */ 3816 /* Normalize the subnormal. */ 3817 while (extract64(frac, frac_size - 1, 1) == 0) { 3818 exp--; 3819 frac <<= 1; 3820 } 3821 3822 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3823 } 3824 3825 int idx = ((exp & 1) << (precision - 1)) | 3826 (frac >> (frac_size - precision + 1)); 3827 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3828 (frac_size - precision); 3829 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3830 3831 uint64_t val = 0; 3832 val = deposit64(val, 0, frac_size, out_frac); 3833 val = deposit64(val, frac_size, exp_size, out_exp); 3834 val = deposit64(val, frac_size + exp_size, 1, sign); 3835 return val; 3836 } 3837 3838 static float16 frsqrt7_h(float16 f, float_status *s) 3839 { 3840 int exp_size = 5, frac_size = 10; 3841 bool sign = float16_is_neg(f); 3842 3843 /* 3844 * frsqrt7(sNaN) = canonical NaN 3845 * frsqrt7(-inf) = canonical NaN 3846 * frsqrt7(-normal) = canonical NaN 3847 * frsqrt7(-subnormal) = canonical NaN 3848 */ 3849 if (float16_is_signaling_nan(f, s) || 3850 (float16_is_infinity(f) && sign) || 3851 (float16_is_normal(f) && sign) || 3852 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3853 s->float_exception_flags |= float_flag_invalid; 3854 return float16_default_nan(s); 3855 } 3856 3857 /* frsqrt7(qNaN) = canonical NaN */ 3858 if (float16_is_quiet_nan(f, s)) { 3859 return float16_default_nan(s); 3860 } 3861 3862 /* frsqrt7(+-0) = +-inf */ 3863 if (float16_is_zero(f)) { 3864 s->float_exception_flags |= float_flag_divbyzero; 3865 return float16_set_sign(float16_infinity, sign); 3866 } 3867 3868 /* frsqrt7(+inf) = +0 */ 3869 if (float16_is_infinity(f) && !sign) { 3870 return float16_set_sign(float16_zero, sign); 3871 } 3872 3873 /* +normal, +subnormal */ 3874 uint64_t val = frsqrt7(f, exp_size, frac_size); 3875 return make_float16(val); 3876 } 3877 3878 static float32 frsqrt7_s(float32 f, float_status *s) 3879 { 3880 int exp_size = 8, frac_size = 23; 3881 bool sign = float32_is_neg(f); 3882 3883 /* 3884 * frsqrt7(sNaN) = canonical NaN 3885 * frsqrt7(-inf) = canonical NaN 3886 * frsqrt7(-normal) = canonical NaN 3887 * frsqrt7(-subnormal) = canonical NaN 3888 */ 3889 if (float32_is_signaling_nan(f, s) || 3890 (float32_is_infinity(f) && sign) || 3891 (float32_is_normal(f) && sign) || 3892 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3893 s->float_exception_flags |= float_flag_invalid; 3894 return float32_default_nan(s); 3895 } 3896 3897 /* frsqrt7(qNaN) = canonical NaN */ 3898 if (float32_is_quiet_nan(f, s)) { 3899 return float32_default_nan(s); 3900 } 3901 3902 /* frsqrt7(+-0) = +-inf */ 3903 if (float32_is_zero(f)) { 3904 s->float_exception_flags |= float_flag_divbyzero; 3905 return float32_set_sign(float32_infinity, sign); 3906 } 3907 3908 /* frsqrt7(+inf) = +0 */ 3909 if (float32_is_infinity(f) && !sign) { 3910 return float32_set_sign(float32_zero, sign); 3911 } 3912 3913 /* +normal, +subnormal */ 3914 uint64_t val = frsqrt7(f, exp_size, frac_size); 3915 return make_float32(val); 3916 } 3917 3918 static float64 frsqrt7_d(float64 f, float_status *s) 3919 { 3920 int exp_size = 11, frac_size = 52; 3921 bool sign = float64_is_neg(f); 3922 3923 /* 3924 * frsqrt7(sNaN) = canonical NaN 3925 * frsqrt7(-inf) = canonical NaN 3926 * frsqrt7(-normal) = canonical NaN 3927 * frsqrt7(-subnormal) = canonical NaN 3928 */ 3929 if (float64_is_signaling_nan(f, s) || 3930 (float64_is_infinity(f) && sign) || 3931 (float64_is_normal(f) && sign) || 3932 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3933 s->float_exception_flags |= float_flag_invalid; 3934 return float64_default_nan(s); 3935 } 3936 3937 /* frsqrt7(qNaN) = canonical NaN */ 3938 if (float64_is_quiet_nan(f, s)) { 3939 return float64_default_nan(s); 3940 } 3941 3942 /* frsqrt7(+-0) = +-inf */ 3943 if (float64_is_zero(f)) { 3944 s->float_exception_flags |= float_flag_divbyzero; 3945 return float64_set_sign(float64_infinity, sign); 3946 } 3947 3948 /* frsqrt7(+inf) = +0 */ 3949 if (float64_is_infinity(f) && !sign) { 3950 return float64_set_sign(float64_zero, sign); 3951 } 3952 3953 /* +normal, +subnormal */ 3954 uint64_t val = frsqrt7(f, exp_size, frac_size); 3955 return make_float64(val); 3956 } 3957 3958 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3959 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3960 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3961 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3962 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3963 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3964 3965 /* 3966 * Vector Floating-Point Reciprocal Estimate Instruction 3967 * 3968 * Adapted from riscv-v-spec recip.c: 3969 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3970 */ 3971 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3972 float_status *s) 3973 { 3974 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3975 uint64_t exp = extract64(f, frac_size, exp_size); 3976 uint64_t frac = extract64(f, 0, frac_size); 3977 3978 const uint8_t lookup_table[] = { 3979 127, 125, 123, 121, 119, 117, 116, 114, 3980 112, 110, 109, 107, 105, 104, 102, 100, 3981 99, 97, 96, 94, 93, 91, 90, 88, 3982 87, 85, 84, 83, 81, 80, 79, 77, 3983 76, 75, 74, 72, 71, 70, 69, 68, 3984 66, 65, 64, 63, 62, 61, 60, 59, 3985 58, 57, 56, 55, 54, 53, 52, 51, 3986 50, 49, 48, 47, 46, 45, 44, 43, 3987 42, 41, 40, 40, 39, 38, 37, 36, 3988 35, 35, 34, 33, 32, 31, 31, 30, 3989 29, 28, 28, 27, 26, 25, 25, 24, 3990 23, 23, 22, 21, 21, 20, 19, 19, 3991 18, 17, 17, 16, 15, 15, 14, 14, 3992 13, 12, 12, 11, 11, 10, 9, 9, 3993 8, 8, 7, 7, 6, 5, 5, 4, 3994 4, 3, 3, 2, 2, 1, 1, 0 3995 }; 3996 const int precision = 7; 3997 3998 if (exp == 0 && frac != 0) { /* subnormal */ 3999 /* Normalize the subnormal. */ 4000 while (extract64(frac, frac_size - 1, 1) == 0) { 4001 exp--; 4002 frac <<= 1; 4003 } 4004 4005 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 4006 4007 if (exp != 0 && exp != UINT64_MAX) { 4008 /* 4009 * Overflow to inf or max value of same sign, 4010 * depending on sign and rounding mode. 4011 */ 4012 s->float_exception_flags |= (float_flag_inexact | 4013 float_flag_overflow); 4014 4015 if ((s->float_rounding_mode == float_round_to_zero) || 4016 ((s->float_rounding_mode == float_round_down) && !sign) || 4017 ((s->float_rounding_mode == float_round_up) && sign)) { 4018 /* Return greatest/negative finite value. */ 4019 return (sign << (exp_size + frac_size)) | 4020 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 4021 } else { 4022 /* Return +-inf. */ 4023 return (sign << (exp_size + frac_size)) | 4024 MAKE_64BIT_MASK(frac_size, exp_size); 4025 } 4026 } 4027 } 4028 4029 int idx = frac >> (frac_size - precision); 4030 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 4031 (frac_size - precision); 4032 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 4033 4034 if (out_exp == 0 || out_exp == UINT64_MAX) { 4035 /* 4036 * The result is subnormal, but don't raise the underflow exception, 4037 * because there's no additional loss of precision. 4038 */ 4039 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 4040 if (out_exp == UINT64_MAX) { 4041 out_frac >>= 1; 4042 out_exp = 0; 4043 } 4044 } 4045 4046 uint64_t val = 0; 4047 val = deposit64(val, 0, frac_size, out_frac); 4048 val = deposit64(val, frac_size, exp_size, out_exp); 4049 val = deposit64(val, frac_size + exp_size, 1, sign); 4050 return val; 4051 } 4052 4053 static float16 frec7_h(float16 f, float_status *s) 4054 { 4055 int exp_size = 5, frac_size = 10; 4056 bool sign = float16_is_neg(f); 4057 4058 /* frec7(+-inf) = +-0 */ 4059 if (float16_is_infinity(f)) { 4060 return float16_set_sign(float16_zero, sign); 4061 } 4062 4063 /* frec7(+-0) = +-inf */ 4064 if (float16_is_zero(f)) { 4065 s->float_exception_flags |= float_flag_divbyzero; 4066 return float16_set_sign(float16_infinity, sign); 4067 } 4068 4069 /* frec7(sNaN) = canonical NaN */ 4070 if (float16_is_signaling_nan(f, s)) { 4071 s->float_exception_flags |= float_flag_invalid; 4072 return float16_default_nan(s); 4073 } 4074 4075 /* frec7(qNaN) = canonical NaN */ 4076 if (float16_is_quiet_nan(f, s)) { 4077 return float16_default_nan(s); 4078 } 4079 4080 /* +-normal, +-subnormal */ 4081 uint64_t val = frec7(f, exp_size, frac_size, s); 4082 return make_float16(val); 4083 } 4084 4085 static float32 frec7_s(float32 f, float_status *s) 4086 { 4087 int exp_size = 8, frac_size = 23; 4088 bool sign = float32_is_neg(f); 4089 4090 /* frec7(+-inf) = +-0 */ 4091 if (float32_is_infinity(f)) { 4092 return float32_set_sign(float32_zero, sign); 4093 } 4094 4095 /* frec7(+-0) = +-inf */ 4096 if (float32_is_zero(f)) { 4097 s->float_exception_flags |= float_flag_divbyzero; 4098 return float32_set_sign(float32_infinity, sign); 4099 } 4100 4101 /* frec7(sNaN) = canonical NaN */ 4102 if (float32_is_signaling_nan(f, s)) { 4103 s->float_exception_flags |= float_flag_invalid; 4104 return float32_default_nan(s); 4105 } 4106 4107 /* frec7(qNaN) = canonical NaN */ 4108 if (float32_is_quiet_nan(f, s)) { 4109 return float32_default_nan(s); 4110 } 4111 4112 /* +-normal, +-subnormal */ 4113 uint64_t val = frec7(f, exp_size, frac_size, s); 4114 return make_float32(val); 4115 } 4116 4117 static float64 frec7_d(float64 f, float_status *s) 4118 { 4119 int exp_size = 11, frac_size = 52; 4120 bool sign = float64_is_neg(f); 4121 4122 /* frec7(+-inf) = +-0 */ 4123 if (float64_is_infinity(f)) { 4124 return float64_set_sign(float64_zero, sign); 4125 } 4126 4127 /* frec7(+-0) = +-inf */ 4128 if (float64_is_zero(f)) { 4129 s->float_exception_flags |= float_flag_divbyzero; 4130 return float64_set_sign(float64_infinity, sign); 4131 } 4132 4133 /* frec7(sNaN) = canonical NaN */ 4134 if (float64_is_signaling_nan(f, s)) { 4135 s->float_exception_flags |= float_flag_invalid; 4136 return float64_default_nan(s); 4137 } 4138 4139 /* frec7(qNaN) = canonical NaN */ 4140 if (float64_is_quiet_nan(f, s)) { 4141 return float64_default_nan(s); 4142 } 4143 4144 /* +-normal, +-subnormal */ 4145 uint64_t val = frec7(f, exp_size, frac_size, s); 4146 return make_float64(val); 4147 } 4148 4149 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4150 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4151 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4152 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4153 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4154 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4155 4156 /* Vector Floating-Point MIN/MAX Instructions */ 4157 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4158 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4159 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4160 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4161 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4162 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4163 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4164 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4165 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4166 GEN_VEXT_VF(vfmin_vf_h, 2) 4167 GEN_VEXT_VF(vfmin_vf_w, 4) 4168 GEN_VEXT_VF(vfmin_vf_d, 8) 4169 4170 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4171 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4172 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4173 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4174 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4175 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4176 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4177 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4178 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4179 GEN_VEXT_VF(vfmax_vf_h, 2) 4180 GEN_VEXT_VF(vfmax_vf_w, 4) 4181 GEN_VEXT_VF(vfmax_vf_d, 8) 4182 4183 /* Vector Floating-Point Sign-Injection Instructions */ 4184 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4185 { 4186 return deposit64(b, 0, 15, a); 4187 } 4188 4189 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4190 { 4191 return deposit64(b, 0, 31, a); 4192 } 4193 4194 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4195 { 4196 return deposit64(b, 0, 63, a); 4197 } 4198 4199 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4200 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4201 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4202 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4203 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4204 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4205 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4206 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4207 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4208 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4209 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4210 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4211 4212 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4213 { 4214 return deposit64(~b, 0, 15, a); 4215 } 4216 4217 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4218 { 4219 return deposit64(~b, 0, 31, a); 4220 } 4221 4222 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4223 { 4224 return deposit64(~b, 0, 63, a); 4225 } 4226 4227 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4228 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4229 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4230 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4231 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4232 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4233 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4234 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4235 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4236 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4237 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4238 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4239 4240 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4241 { 4242 return deposit64(b ^ a, 0, 15, a); 4243 } 4244 4245 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4246 { 4247 return deposit64(b ^ a, 0, 31, a); 4248 } 4249 4250 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4251 { 4252 return deposit64(b ^ a, 0, 63, a); 4253 } 4254 4255 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4256 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4257 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4258 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4259 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4260 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4261 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4262 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4263 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4264 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4265 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4266 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4267 4268 /* Vector Floating-Point Compare Instructions */ 4269 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4270 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4271 CPURISCVState *env, uint32_t desc) \ 4272 { \ 4273 uint32_t vm = vext_vm(desc); \ 4274 uint32_t vl = env->vl; \ 4275 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4276 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4277 uint32_t vma = vext_vma(desc); \ 4278 uint32_t i; \ 4279 \ 4280 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4281 \ 4282 for (i = env->vstart; i < vl; i++) { \ 4283 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4284 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4285 if (!vm && !vext_elem_mask(v0, i)) { \ 4286 /* set masked-off elements to 1s */ \ 4287 if (vma) { \ 4288 vext_set_elem_mask(vd, i, 1); \ 4289 } \ 4290 continue; \ 4291 } \ 4292 vext_set_elem_mask(vd, i, \ 4293 DO_OP(s2, s1, &env->fp_status)); \ 4294 } \ 4295 env->vstart = 0; \ 4296 /* 4297 * mask destination register are always tail-agnostic 4298 * set tail elements to 1s 4299 */ \ 4300 if (vta_all_1s) { \ 4301 for (; i < total_elems; i++) { \ 4302 vext_set_elem_mask(vd, i, 1); \ 4303 } \ 4304 } \ 4305 } 4306 4307 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4308 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4309 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4310 4311 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4312 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4313 CPURISCVState *env, uint32_t desc) \ 4314 { \ 4315 uint32_t vm = vext_vm(desc); \ 4316 uint32_t vl = env->vl; \ 4317 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4318 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4319 uint32_t vma = vext_vma(desc); \ 4320 uint32_t i; \ 4321 \ 4322 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4323 \ 4324 for (i = env->vstart; i < vl; i++) { \ 4325 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4326 if (!vm && !vext_elem_mask(v0, i)) { \ 4327 /* set masked-off elements to 1s */ \ 4328 if (vma) { \ 4329 vext_set_elem_mask(vd, i, 1); \ 4330 } \ 4331 continue; \ 4332 } \ 4333 vext_set_elem_mask(vd, i, \ 4334 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4335 } \ 4336 env->vstart = 0; \ 4337 /* 4338 * mask destination register are always tail-agnostic 4339 * set tail elements to 1s 4340 */ \ 4341 if (vta_all_1s) { \ 4342 for (; i < total_elems; i++) { \ 4343 vext_set_elem_mask(vd, i, 1); \ 4344 } \ 4345 } \ 4346 } 4347 4348 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4349 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4350 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4351 4352 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4353 { 4354 FloatRelation compare = float16_compare_quiet(a, b, s); 4355 return compare != float_relation_equal; 4356 } 4357 4358 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4359 { 4360 FloatRelation compare = float32_compare_quiet(a, b, s); 4361 return compare != float_relation_equal; 4362 } 4363 4364 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4365 { 4366 FloatRelation compare = float64_compare_quiet(a, b, s); 4367 return compare != float_relation_equal; 4368 } 4369 4370 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4371 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4372 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4373 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4374 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4375 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4376 4377 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4378 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4379 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4380 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4381 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4382 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4383 4384 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4385 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4386 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4387 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4388 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4389 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4390 4391 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4392 { 4393 FloatRelation compare = float16_compare(a, b, s); 4394 return compare == float_relation_greater; 4395 } 4396 4397 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4398 { 4399 FloatRelation compare = float32_compare(a, b, s); 4400 return compare == float_relation_greater; 4401 } 4402 4403 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4404 { 4405 FloatRelation compare = float64_compare(a, b, s); 4406 return compare == float_relation_greater; 4407 } 4408 4409 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4410 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4411 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4412 4413 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4414 { 4415 FloatRelation compare = float16_compare(a, b, s); 4416 return compare == float_relation_greater || 4417 compare == float_relation_equal; 4418 } 4419 4420 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4421 { 4422 FloatRelation compare = float32_compare(a, b, s); 4423 return compare == float_relation_greater || 4424 compare == float_relation_equal; 4425 } 4426 4427 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4428 { 4429 FloatRelation compare = float64_compare(a, b, s); 4430 return compare == float_relation_greater || 4431 compare == float_relation_equal; 4432 } 4433 4434 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4435 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4436 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4437 4438 /* Vector Floating-Point Classify Instruction */ 4439 target_ulong fclass_h(uint64_t frs1) 4440 { 4441 float16 f = frs1; 4442 bool sign = float16_is_neg(f); 4443 4444 if (float16_is_infinity(f)) { 4445 return sign ? 1 << 0 : 1 << 7; 4446 } else if (float16_is_zero(f)) { 4447 return sign ? 1 << 3 : 1 << 4; 4448 } else if (float16_is_zero_or_denormal(f)) { 4449 return sign ? 1 << 2 : 1 << 5; 4450 } else if (float16_is_any_nan(f)) { 4451 float_status s = { }; /* for snan_bit_is_one */ 4452 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4453 } else { 4454 return sign ? 1 << 1 : 1 << 6; 4455 } 4456 } 4457 4458 target_ulong fclass_s(uint64_t frs1) 4459 { 4460 float32 f = frs1; 4461 bool sign = float32_is_neg(f); 4462 4463 if (float32_is_infinity(f)) { 4464 return sign ? 1 << 0 : 1 << 7; 4465 } else if (float32_is_zero(f)) { 4466 return sign ? 1 << 3 : 1 << 4; 4467 } else if (float32_is_zero_or_denormal(f)) { 4468 return sign ? 1 << 2 : 1 << 5; 4469 } else if (float32_is_any_nan(f)) { 4470 float_status s = { }; /* for snan_bit_is_one */ 4471 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4472 } else { 4473 return sign ? 1 << 1 : 1 << 6; 4474 } 4475 } 4476 4477 target_ulong fclass_d(uint64_t frs1) 4478 { 4479 float64 f = frs1; 4480 bool sign = float64_is_neg(f); 4481 4482 if (float64_is_infinity(f)) { 4483 return sign ? 1 << 0 : 1 << 7; 4484 } else if (float64_is_zero(f)) { 4485 return sign ? 1 << 3 : 1 << 4; 4486 } else if (float64_is_zero_or_denormal(f)) { 4487 return sign ? 1 << 2 : 1 << 5; 4488 } else if (float64_is_any_nan(f)) { 4489 float_status s = { }; /* for snan_bit_is_one */ 4490 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4491 } else { 4492 return sign ? 1 << 1 : 1 << 6; 4493 } 4494 } 4495 4496 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4497 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4498 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4499 GEN_VEXT_V(vfclass_v_h, 2) 4500 GEN_VEXT_V(vfclass_v_w, 4) 4501 GEN_VEXT_V(vfclass_v_d, 8) 4502 4503 /* Vector Floating-Point Merge Instruction */ 4504 4505 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4506 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4507 CPURISCVState *env, uint32_t desc) \ 4508 { \ 4509 uint32_t vm = vext_vm(desc); \ 4510 uint32_t vl = env->vl; \ 4511 uint32_t esz = sizeof(ETYPE); \ 4512 uint32_t total_elems = \ 4513 vext_get_total_elems(env, desc, esz); \ 4514 uint32_t vta = vext_vta(desc); \ 4515 uint32_t i; \ 4516 \ 4517 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4518 \ 4519 for (i = env->vstart; i < vl; i++) { \ 4520 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4521 *((ETYPE *)vd + H(i)) = \ 4522 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4523 } \ 4524 env->vstart = 0; \ 4525 /* set tail elements to 1s */ \ 4526 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4527 } 4528 4529 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4530 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4531 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4532 4533 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4534 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4535 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4536 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4537 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4538 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4539 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4540 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4541 4542 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4543 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4544 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4545 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4546 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4547 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4548 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4549 4550 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4551 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4552 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4553 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4554 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4555 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4556 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4557 4558 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4559 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4560 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4561 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4562 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4563 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4564 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4565 4566 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4567 /* (TD, T2, TX2) */ 4568 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4569 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4570 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4571 /* 4572 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4573 */ 4574 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4575 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4576 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4577 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4578 4579 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4580 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4581 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4582 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4583 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4584 4585 /* 4586 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4587 */ 4588 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4589 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4590 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4591 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4592 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4593 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4594 4595 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4596 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4597 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4598 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4599 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4600 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4601 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4602 4603 /* 4604 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4605 */ 4606 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4607 { 4608 return float16_to_float32(a, true, s); 4609 } 4610 4611 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4612 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4613 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4614 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4615 4616 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4617 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4618 4619 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4620 /* (TD, T2, TX2) */ 4621 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4622 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4623 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4624 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4625 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4626 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4627 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4628 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4629 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4630 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4631 4632 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4633 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4634 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4635 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4636 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4637 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4638 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4639 4640 /* 4641 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4642 */ 4643 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4644 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4645 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4646 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4647 4648 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4649 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4650 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4651 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4652 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4653 4654 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4655 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4656 { 4657 return float32_to_float16(a, true, s); 4658 } 4659 4660 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4661 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4662 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4663 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4664 4665 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4666 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4667 4668 /* 4669 * Vector Reduction Operations 4670 */ 4671 /* Vector Single-Width Integer Reduction Instructions */ 4672 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4673 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4674 void *vs2, CPURISCVState *env, \ 4675 uint32_t desc) \ 4676 { \ 4677 uint32_t vm = vext_vm(desc); \ 4678 uint32_t vl = env->vl; \ 4679 uint32_t esz = sizeof(TD); \ 4680 uint32_t vlenb = simd_maxsz(desc); \ 4681 uint32_t vta = vext_vta(desc); \ 4682 uint32_t i; \ 4683 TD s1 = *((TD *)vs1 + HD(0)); \ 4684 \ 4685 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4686 \ 4687 for (i = env->vstart; i < vl; i++) { \ 4688 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4689 if (!vm && !vext_elem_mask(v0, i)) { \ 4690 continue; \ 4691 } \ 4692 s1 = OP(s1, (TD)s2); \ 4693 } \ 4694 if (vl > 0) { \ 4695 *((TD *)vd + HD(0)) = s1; \ 4696 } \ 4697 env->vstart = 0; \ 4698 /* set tail elements to 1s */ \ 4699 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4700 } 4701 4702 /* vd[0] = sum(vs1[0], vs2[*]) */ 4703 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4704 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4705 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4706 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4707 4708 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4709 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4710 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4711 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4712 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4713 4714 /* vd[0] = max(vs1[0], vs2[*]) */ 4715 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4716 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4717 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4718 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4719 4720 /* vd[0] = minu(vs1[0], vs2[*]) */ 4721 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4722 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4723 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4724 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4725 4726 /* vd[0] = min(vs1[0], vs2[*]) */ 4727 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4728 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4729 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4730 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4731 4732 /* vd[0] = and(vs1[0], vs2[*]) */ 4733 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4734 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4735 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4736 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4737 4738 /* vd[0] = or(vs1[0], vs2[*]) */ 4739 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4740 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4741 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4742 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4743 4744 /* vd[0] = xor(vs1[0], vs2[*]) */ 4745 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4746 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4747 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4748 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4749 4750 /* Vector Widening Integer Reduction Instructions */ 4751 /* signed sum reduction into double-width accumulator */ 4752 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4753 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4754 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4755 4756 /* Unsigned sum reduction into double-width accumulator */ 4757 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4758 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4759 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4760 4761 /* Vector Single-Width Floating-Point Reduction Instructions */ 4762 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4763 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4764 void *vs2, CPURISCVState *env, \ 4765 uint32_t desc) \ 4766 { \ 4767 uint32_t vm = vext_vm(desc); \ 4768 uint32_t vl = env->vl; \ 4769 uint32_t esz = sizeof(TD); \ 4770 uint32_t vlenb = simd_maxsz(desc); \ 4771 uint32_t vta = vext_vta(desc); \ 4772 uint32_t i; \ 4773 TD s1 = *((TD *)vs1 + HD(0)); \ 4774 \ 4775 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4776 \ 4777 for (i = env->vstart; i < vl; i++) { \ 4778 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4779 if (!vm && !vext_elem_mask(v0, i)) { \ 4780 continue; \ 4781 } \ 4782 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4783 } \ 4784 if (vl > 0) { \ 4785 *((TD *)vd + HD(0)) = s1; \ 4786 } \ 4787 env->vstart = 0; \ 4788 /* set tail elements to 1s */ \ 4789 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4790 } 4791 4792 /* Unordered sum */ 4793 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4794 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4795 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4796 4797 /* Ordered sum */ 4798 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4799 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4800 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4801 4802 /* Maximum value */ 4803 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4804 float16_maximum_number) 4805 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4806 float32_maximum_number) 4807 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4808 float64_maximum_number) 4809 4810 /* Minimum value */ 4811 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4812 float16_minimum_number) 4813 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4814 float32_minimum_number) 4815 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4816 float64_minimum_number) 4817 4818 /* Vector Widening Floating-Point Add Instructions */ 4819 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4820 { 4821 return float32_add(a, float16_to_float32(b, true, s), s); 4822 } 4823 4824 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4825 { 4826 return float64_add(a, float32_to_float64(b, s), s); 4827 } 4828 4829 /* Vector Widening Floating-Point Reduction Instructions */ 4830 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4831 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4832 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4833 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4834 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4835 4836 /* 4837 * Vector Mask Operations 4838 */ 4839 /* Vector Mask-Register Logical Instructions */ 4840 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4841 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4842 void *vs2, CPURISCVState *env, \ 4843 uint32_t desc) \ 4844 { \ 4845 uint32_t vl = env->vl; \ 4846 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4847 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4848 uint32_t i; \ 4849 int a, b; \ 4850 \ 4851 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4852 \ 4853 for (i = env->vstart; i < vl; i++) { \ 4854 a = vext_elem_mask(vs1, i); \ 4855 b = vext_elem_mask(vs2, i); \ 4856 vext_set_elem_mask(vd, i, OP(b, a)); \ 4857 } \ 4858 env->vstart = 0; \ 4859 /* 4860 * mask destination register are always tail-agnostic 4861 * set tail elements to 1s 4862 */ \ 4863 if (vta_all_1s) { \ 4864 for (; i < total_elems; i++) { \ 4865 vext_set_elem_mask(vd, i, 1); \ 4866 } \ 4867 } \ 4868 } 4869 4870 #define DO_NAND(N, M) (!(N & M)) 4871 #define DO_ANDNOT(N, M) (N & !M) 4872 #define DO_NOR(N, M) (!(N | M)) 4873 #define DO_ORNOT(N, M) (N | !M) 4874 #define DO_XNOR(N, M) (!(N ^ M)) 4875 4876 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4877 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4878 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4879 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4880 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4881 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4882 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4883 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4884 4885 /* Vector count population in mask vcpop */ 4886 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4887 uint32_t desc) 4888 { 4889 target_ulong cnt = 0; 4890 uint32_t vm = vext_vm(desc); 4891 uint32_t vl = env->vl; 4892 int i; 4893 4894 for (i = env->vstart; i < vl; i++) { 4895 if (vm || vext_elem_mask(v0, i)) { 4896 if (vext_elem_mask(vs2, i)) { 4897 cnt++; 4898 } 4899 } 4900 } 4901 env->vstart = 0; 4902 return cnt; 4903 } 4904 4905 /* vfirst find-first-set mask bit */ 4906 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4907 uint32_t desc) 4908 { 4909 uint32_t vm = vext_vm(desc); 4910 uint32_t vl = env->vl; 4911 int i; 4912 4913 for (i = env->vstart; i < vl; i++) { 4914 if (vm || vext_elem_mask(v0, i)) { 4915 if (vext_elem_mask(vs2, i)) { 4916 return i; 4917 } 4918 } 4919 } 4920 env->vstart = 0; 4921 return -1LL; 4922 } 4923 4924 enum set_mask_type { 4925 ONLY_FIRST = 1, 4926 INCLUDE_FIRST, 4927 BEFORE_FIRST, 4928 }; 4929 4930 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4931 uint32_t desc, enum set_mask_type type) 4932 { 4933 uint32_t vm = vext_vm(desc); 4934 uint32_t vl = env->vl; 4935 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4936 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4937 uint32_t vma = vext_vma(desc); 4938 int i; 4939 bool first_mask_bit = false; 4940 4941 VSTART_CHECK_EARLY_EXIT(env, vl); 4942 4943 for (i = env->vstart; i < vl; i++) { 4944 if (!vm && !vext_elem_mask(v0, i)) { 4945 /* set masked-off elements to 1s */ 4946 if (vma) { 4947 vext_set_elem_mask(vd, i, 1); 4948 } 4949 continue; 4950 } 4951 /* write a zero to all following active elements */ 4952 if (first_mask_bit) { 4953 vext_set_elem_mask(vd, i, 0); 4954 continue; 4955 } 4956 if (vext_elem_mask(vs2, i)) { 4957 first_mask_bit = true; 4958 if (type == BEFORE_FIRST) { 4959 vext_set_elem_mask(vd, i, 0); 4960 } else { 4961 vext_set_elem_mask(vd, i, 1); 4962 } 4963 } else { 4964 if (type == ONLY_FIRST) { 4965 vext_set_elem_mask(vd, i, 0); 4966 } else { 4967 vext_set_elem_mask(vd, i, 1); 4968 } 4969 } 4970 } 4971 env->vstart = 0; 4972 /* 4973 * mask destination register are always tail-agnostic 4974 * set tail elements to 1s 4975 */ 4976 if (vta_all_1s) { 4977 for (; i < total_elems; i++) { 4978 vext_set_elem_mask(vd, i, 1); 4979 } 4980 } 4981 } 4982 4983 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4984 uint32_t desc) 4985 { 4986 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4987 } 4988 4989 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4990 uint32_t desc) 4991 { 4992 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4993 } 4994 4995 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4996 uint32_t desc) 4997 { 4998 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4999 } 5000 5001 /* Vector Iota Instruction */ 5002 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 5003 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 5004 uint32_t desc) \ 5005 { \ 5006 uint32_t vm = vext_vm(desc); \ 5007 uint32_t vl = env->vl; \ 5008 uint32_t esz = sizeof(ETYPE); \ 5009 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5010 uint32_t vta = vext_vta(desc); \ 5011 uint32_t vma = vext_vma(desc); \ 5012 uint32_t sum = 0; \ 5013 int i; \ 5014 \ 5015 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5016 \ 5017 for (i = env->vstart; i < vl; i++) { \ 5018 if (!vm && !vext_elem_mask(v0, i)) { \ 5019 /* set masked-off elements to 1s */ \ 5020 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5021 continue; \ 5022 } \ 5023 *((ETYPE *)vd + H(i)) = sum; \ 5024 if (vext_elem_mask(vs2, i)) { \ 5025 sum++; \ 5026 } \ 5027 } \ 5028 env->vstart = 0; \ 5029 /* set tail elements to 1s */ \ 5030 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5031 } 5032 5033 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 5034 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 5035 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 5036 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 5037 5038 /* Vector Element Index Instruction */ 5039 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 5040 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 5041 { \ 5042 uint32_t vm = vext_vm(desc); \ 5043 uint32_t vl = env->vl; \ 5044 uint32_t esz = sizeof(ETYPE); \ 5045 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5046 uint32_t vta = vext_vta(desc); \ 5047 uint32_t vma = vext_vma(desc); \ 5048 int i; \ 5049 \ 5050 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5051 \ 5052 for (i = env->vstart; i < vl; i++) { \ 5053 if (!vm && !vext_elem_mask(v0, i)) { \ 5054 /* set masked-off elements to 1s */ \ 5055 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5056 continue; \ 5057 } \ 5058 *((ETYPE *)vd + H(i)) = i; \ 5059 } \ 5060 env->vstart = 0; \ 5061 /* set tail elements to 1s */ \ 5062 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5063 } 5064 5065 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 5066 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 5067 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 5068 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 5069 5070 /* 5071 * Vector Permutation Instructions 5072 */ 5073 5074 /* Vector Slide Instructions */ 5075 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 5076 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5077 CPURISCVState *env, uint32_t desc) \ 5078 { \ 5079 uint32_t vm = vext_vm(desc); \ 5080 uint32_t vl = env->vl; \ 5081 uint32_t esz = sizeof(ETYPE); \ 5082 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5083 uint32_t vta = vext_vta(desc); \ 5084 uint32_t vma = vext_vma(desc); \ 5085 target_ulong offset = s1, i_min, i; \ 5086 \ 5087 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5088 \ 5089 i_min = MAX(env->vstart, offset); \ 5090 for (i = i_min; i < vl; i++) { \ 5091 if (!vm && !vext_elem_mask(v0, i)) { \ 5092 /* set masked-off elements to 1s */ \ 5093 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5094 continue; \ 5095 } \ 5096 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5097 } \ 5098 env->vstart = 0; \ 5099 /* set tail elements to 1s */ \ 5100 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5101 } 5102 5103 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5104 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5105 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5106 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5107 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5108 5109 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5110 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5111 CPURISCVState *env, uint32_t desc) \ 5112 { \ 5113 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5114 uint32_t vm = vext_vm(desc); \ 5115 uint32_t vl = env->vl; \ 5116 uint32_t esz = sizeof(ETYPE); \ 5117 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5118 uint32_t vta = vext_vta(desc); \ 5119 uint32_t vma = vext_vma(desc); \ 5120 target_ulong i_max, i_min, i; \ 5121 \ 5122 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5123 \ 5124 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 5125 i_max = MAX(i_min, env->vstart); \ 5126 for (i = env->vstart; i < i_max; ++i) { \ 5127 if (!vm && !vext_elem_mask(v0, i)) { \ 5128 /* set masked-off elements to 1s */ \ 5129 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5130 continue; \ 5131 } \ 5132 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5133 } \ 5134 \ 5135 for (i = i_max; i < vl; ++i) { \ 5136 if (!vm && !vext_elem_mask(v0, i)) { \ 5137 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5138 continue; \ 5139 } \ 5140 *((ETYPE *)vd + H(i)) = 0; \ 5141 } \ 5142 \ 5143 env->vstart = 0; \ 5144 /* set tail elements to 1s */ \ 5145 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5146 } 5147 5148 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5149 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5150 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5151 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5152 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5153 5154 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5155 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5156 void *vs2, CPURISCVState *env, \ 5157 uint32_t desc) \ 5158 { \ 5159 typedef uint##BITWIDTH##_t ETYPE; \ 5160 uint32_t vm = vext_vm(desc); \ 5161 uint32_t vl = env->vl; \ 5162 uint32_t esz = sizeof(ETYPE); \ 5163 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5164 uint32_t vta = vext_vta(desc); \ 5165 uint32_t vma = vext_vma(desc); \ 5166 uint32_t i; \ 5167 \ 5168 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5169 \ 5170 for (i = env->vstart; i < vl; i++) { \ 5171 if (!vm && !vext_elem_mask(v0, i)) { \ 5172 /* set masked-off elements to 1s */ \ 5173 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5174 continue; \ 5175 } \ 5176 if (i == 0) { \ 5177 *((ETYPE *)vd + H(i)) = s1; \ 5178 } else { \ 5179 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5180 } \ 5181 } \ 5182 env->vstart = 0; \ 5183 /* set tail elements to 1s */ \ 5184 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5185 } 5186 5187 GEN_VEXT_VSLIE1UP(8, H1) 5188 GEN_VEXT_VSLIE1UP(16, H2) 5189 GEN_VEXT_VSLIE1UP(32, H4) 5190 GEN_VEXT_VSLIE1UP(64, H8) 5191 5192 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5193 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5194 CPURISCVState *env, uint32_t desc) \ 5195 { \ 5196 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5197 } 5198 5199 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5200 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5201 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5202 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5203 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5204 5205 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5206 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5207 void *vs2, CPURISCVState *env, \ 5208 uint32_t desc) \ 5209 { \ 5210 typedef uint##BITWIDTH##_t ETYPE; \ 5211 uint32_t vm = vext_vm(desc); \ 5212 uint32_t vl = env->vl; \ 5213 uint32_t esz = sizeof(ETYPE); \ 5214 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5215 uint32_t vta = vext_vta(desc); \ 5216 uint32_t vma = vext_vma(desc); \ 5217 uint32_t i; \ 5218 \ 5219 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5220 \ 5221 for (i = env->vstart; i < vl; i++) { \ 5222 if (!vm && !vext_elem_mask(v0, i)) { \ 5223 /* set masked-off elements to 1s */ \ 5224 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5225 continue; \ 5226 } \ 5227 if (i == vl - 1) { \ 5228 *((ETYPE *)vd + H(i)) = s1; \ 5229 } else { \ 5230 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5231 } \ 5232 } \ 5233 env->vstart = 0; \ 5234 /* set tail elements to 1s */ \ 5235 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5236 } 5237 5238 GEN_VEXT_VSLIDE1DOWN(8, H1) 5239 GEN_VEXT_VSLIDE1DOWN(16, H2) 5240 GEN_VEXT_VSLIDE1DOWN(32, H4) 5241 GEN_VEXT_VSLIDE1DOWN(64, H8) 5242 5243 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5244 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5245 CPURISCVState *env, uint32_t desc) \ 5246 { \ 5247 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5248 } 5249 5250 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5251 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5252 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5253 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5254 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5255 5256 /* Vector Floating-Point Slide Instructions */ 5257 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5258 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5259 CPURISCVState *env, uint32_t desc) \ 5260 { \ 5261 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5262 } 5263 5264 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5265 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5266 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5267 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5268 5269 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5270 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5271 CPURISCVState *env, uint32_t desc) \ 5272 { \ 5273 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5274 } 5275 5276 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5277 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5278 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5279 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5280 5281 /* Vector Register Gather Instruction */ 5282 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5283 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5284 CPURISCVState *env, uint32_t desc) \ 5285 { \ 5286 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5287 uint32_t vm = vext_vm(desc); \ 5288 uint32_t vl = env->vl; \ 5289 uint32_t esz = sizeof(TS2); \ 5290 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5291 uint32_t vta = vext_vta(desc); \ 5292 uint32_t vma = vext_vma(desc); \ 5293 uint64_t index; \ 5294 uint32_t i; \ 5295 \ 5296 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5297 \ 5298 for (i = env->vstart; i < vl; i++) { \ 5299 if (!vm && !vext_elem_mask(v0, i)) { \ 5300 /* set masked-off elements to 1s */ \ 5301 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5302 continue; \ 5303 } \ 5304 index = *((TS1 *)vs1 + HS1(i)); \ 5305 if (index >= vlmax) { \ 5306 *((TS2 *)vd + HS2(i)) = 0; \ 5307 } else { \ 5308 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5309 } \ 5310 } \ 5311 env->vstart = 0; \ 5312 /* set tail elements to 1s */ \ 5313 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5314 } 5315 5316 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5317 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5318 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5319 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5320 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5321 5322 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5323 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5324 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5325 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5326 5327 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5328 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5329 CPURISCVState *env, uint32_t desc) \ 5330 { \ 5331 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5332 uint32_t vm = vext_vm(desc); \ 5333 uint32_t vl = env->vl; \ 5334 uint32_t esz = sizeof(ETYPE); \ 5335 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5336 uint32_t vta = vext_vta(desc); \ 5337 uint32_t vma = vext_vma(desc); \ 5338 uint64_t index = s1; \ 5339 uint32_t i; \ 5340 \ 5341 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5342 \ 5343 for (i = env->vstart; i < vl; i++) { \ 5344 if (!vm && !vext_elem_mask(v0, i)) { \ 5345 /* set masked-off elements to 1s */ \ 5346 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5347 continue; \ 5348 } \ 5349 if (index >= vlmax) { \ 5350 *((ETYPE *)vd + H(i)) = 0; \ 5351 } else { \ 5352 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5353 } \ 5354 } \ 5355 env->vstart = 0; \ 5356 /* set tail elements to 1s */ \ 5357 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5358 } 5359 5360 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5361 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5362 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5363 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5364 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5365 5366 /* Vector Compress Instruction */ 5367 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5368 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5369 CPURISCVState *env, uint32_t desc) \ 5370 { \ 5371 uint32_t vl = env->vl; \ 5372 uint32_t esz = sizeof(ETYPE); \ 5373 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5374 uint32_t vta = vext_vta(desc); \ 5375 uint32_t num = 0, i; \ 5376 \ 5377 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5378 \ 5379 for (i = env->vstart; i < vl; i++) { \ 5380 if (!vext_elem_mask(vs1, i)) { \ 5381 continue; \ 5382 } \ 5383 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5384 num++; \ 5385 } \ 5386 env->vstart = 0; \ 5387 /* set tail elements to 1s */ \ 5388 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ 5389 } 5390 5391 /* Compress into vd elements of vs2 where vs1 is enabled */ 5392 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5393 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5394 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5395 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5396 5397 /* Vector Whole Register Move */ 5398 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5399 { 5400 /* EEW = SEW */ 5401 uint32_t maxsz = simd_maxsz(desc); 5402 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5403 uint32_t startb = env->vstart * sewb; 5404 uint32_t i = startb; 5405 5406 if (startb >= maxsz) { 5407 env->vstart = 0; 5408 return; 5409 } 5410 5411 if (HOST_BIG_ENDIAN && i % 8 != 0) { 5412 uint32_t j = ROUND_UP(i, 8); 5413 memcpy((uint8_t *)vd + H1(j - 1), 5414 (uint8_t *)vs2 + H1(j - 1), 5415 j - i); 5416 i = j; 5417 } 5418 5419 memcpy((uint8_t *)vd + H1(i), 5420 (uint8_t *)vs2 + H1(i), 5421 maxsz - i); 5422 5423 env->vstart = 0; 5424 } 5425 5426 /* Vector Integer Extension */ 5427 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5428 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5429 CPURISCVState *env, uint32_t desc) \ 5430 { \ 5431 uint32_t vl = env->vl; \ 5432 uint32_t vm = vext_vm(desc); \ 5433 uint32_t esz = sizeof(ETYPE); \ 5434 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5435 uint32_t vta = vext_vta(desc); \ 5436 uint32_t vma = vext_vma(desc); \ 5437 uint32_t i; \ 5438 \ 5439 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5440 \ 5441 for (i = env->vstart; i < vl; i++) { \ 5442 if (!vm && !vext_elem_mask(v0, i)) { \ 5443 /* set masked-off elements to 1s */ \ 5444 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5445 continue; \ 5446 } \ 5447 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5448 } \ 5449 env->vstart = 0; \ 5450 /* set tail elements to 1s */ \ 5451 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5452 } 5453 5454 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5455 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5456 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5457 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5458 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5459 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5460 5461 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5462 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5463 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5464 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5465 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5466 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5467