1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/page-protection.h" 27 #include "exec/helper-proto.h" 28 #include "exec/tlb-flags.h" 29 #include "exec/tswap.h" 30 #include "fpu/softfloat.h" 31 #include "tcg/tcg-gvec-desc.h" 32 #include "internals.h" 33 #include "vector_internals.h" 34 #include <math.h> 35 36 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 37 target_ulong s2) 38 { 39 int vlmax, vl; 40 RISCVCPU *cpu = env_archcpu(env); 41 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 42 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 43 uint16_t sew = 8 << vsew; 44 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 45 int xlen = riscv_cpu_xlen(env); 46 bool vill = (s2 >> (xlen - 1)) & 0x1; 47 target_ulong reserved = s2 & 48 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 49 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 50 uint16_t vlen = cpu->cfg.vlenb << 3; 51 int8_t lmul; 52 53 if (vlmul & 4) { 54 /* 55 * Fractional LMUL, check: 56 * 57 * VLEN * LMUL >= SEW 58 * VLEN >> (8 - lmul) >= sew 59 * (vlenb << 3) >> (8 - lmul) >= sew 60 */ 61 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { 62 vill = true; 63 } 64 } 65 66 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 67 /* only set vill bit. */ 68 env->vill = 1; 69 env->vtype = 0; 70 env->vl = 0; 71 env->vstart = 0; 72 return 0; 73 } 74 75 /* lmul encoded as in DisasContext::lmul */ 76 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 77 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 78 if (s1 <= vlmax) { 79 vl = s1; 80 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) { 81 vl = (s1 + 1) >> 1; 82 } else { 83 vl = vlmax; 84 } 85 env->vl = vl; 86 env->vtype = s2; 87 env->vstart = 0; 88 env->vill = 0; 89 return vl; 90 } 91 92 /* 93 * Get the maximum number of elements can be operated. 94 * 95 * log2_esz: log2 of element size in bytes. 96 */ 97 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 98 { 99 /* 100 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 101 * so vlen in bytes (vlenb) is encoded as maxsz. 102 */ 103 uint32_t vlenb = simd_maxsz(desc); 104 105 /* Return VLMAX */ 106 int scale = vext_lmul(desc) - log2_esz; 107 return scale < 0 ? vlenb >> -scale : vlenb << scale; 108 } 109 110 /* 111 * This function checks watchpoint before real load operation. 112 * 113 * In system mode, the TLB API probe_access is enough for watchpoint check. 114 * In user mode, there is no watchpoint support now. 115 * 116 * It will trigger an exception if there is no mapping in TLB 117 * and page table walk can't fill the TLB entry. Then the guest 118 * software can return here after process the exception or never return. 119 */ 120 static void probe_pages(CPURISCVState *env, target_ulong addr, 121 target_ulong len, uintptr_t ra, 122 MMUAccessType access_type) 123 { 124 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 125 target_ulong curlen = MIN(pagelen, len); 126 int mmu_index = riscv_env_mmu_index(env, false); 127 128 probe_access(env, adjust_addr(env, addr), curlen, access_type, 129 mmu_index, ra); 130 if (len > curlen) { 131 addr += curlen; 132 curlen = len - curlen; 133 probe_access(env, adjust_addr(env, addr), curlen, access_type, 134 mmu_index, ra); 135 } 136 } 137 138 static inline void vext_set_elem_mask(void *v0, int index, 139 uint8_t value) 140 { 141 int idx = index / 64; 142 int pos = index % 64; 143 uint64_t old = ((uint64_t *)v0)[idx]; 144 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 145 } 146 147 /* elements operations for load and store */ 148 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, 149 uint32_t idx, void *vd, uintptr_t retaddr); 150 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); 151 152 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 153 static inline QEMU_ALWAYS_INLINE \ 154 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 155 uint32_t idx, void *vd, uintptr_t retaddr) \ 156 { \ 157 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 158 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 159 } \ 160 \ 161 static inline QEMU_ALWAYS_INLINE \ 162 void NAME##_host(void *vd, uint32_t idx, void *host) \ 163 { \ 164 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 165 *cur = (ETYPE)LDSUF##_p(host); \ 166 } 167 168 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub) 169 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw) 170 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) 171 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) 172 173 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 174 static inline QEMU_ALWAYS_INLINE \ 175 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 176 uint32_t idx, void *vd, uintptr_t retaddr) \ 177 { \ 178 ETYPE data = *((ETYPE *)vd + H(idx)); \ 179 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 180 } \ 181 \ 182 static inline QEMU_ALWAYS_INLINE \ 183 void NAME##_host(void *vd, uint32_t idx, void *host) \ 184 { \ 185 ETYPE data = *((ETYPE *)vd + H(idx)); \ 186 STSUF##_p(host, data); \ 187 } 188 189 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb) 190 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw) 191 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl) 192 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq) 193 194 static inline QEMU_ALWAYS_INLINE void 195 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb, 196 void *vd, uint32_t evl, target_ulong addr, 197 uint32_t reg_start, uintptr_t ra, uint32_t esz, 198 bool is_load) 199 { 200 uint32_t i; 201 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) { 202 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra); 203 } 204 } 205 206 static inline QEMU_ALWAYS_INLINE void 207 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host, 208 void *vd, uint32_t evl, uint32_t reg_start, void *host, 209 uint32_t esz, bool is_load) 210 { 211 #if HOST_BIG_ENDIAN 212 for (; reg_start < evl; reg_start++, host += esz) { 213 ldst_host(vd, reg_start, host); 214 } 215 #else 216 if (esz == 1) { 217 uint32_t byte_offset = reg_start * esz; 218 uint32_t size = (evl - reg_start) * esz; 219 220 if (is_load) { 221 memcpy(vd + byte_offset, host, size); 222 } else { 223 memcpy(host, vd + byte_offset, size); 224 } 225 } else { 226 for (; reg_start < evl; reg_start++, host += esz) { 227 ldst_host(vd, reg_start, host); 228 } 229 } 230 #endif 231 } 232 233 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 234 uint32_t desc, uint32_t nf, 235 uint32_t esz, uint32_t max_elems) 236 { 237 uint32_t vta = vext_vta(desc); 238 int k; 239 240 if (vta == 0) { 241 return; 242 } 243 244 for (k = 0; k < nf; ++k) { 245 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 246 (k * max_elems + max_elems) * esz); 247 } 248 } 249 250 /* 251 * stride: access vector element from strided memory 252 */ 253 static void 254 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, 255 CPURISCVState *env, uint32_t desc, uint32_t vm, 256 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz, 257 uintptr_t ra) 258 { 259 uint32_t i, k; 260 uint32_t nf = vext_nf(desc); 261 uint32_t max_elems = vext_max_elems(desc, log2_esz); 262 uint32_t esz = 1 << log2_esz; 263 uint32_t vma = vext_vma(desc); 264 265 VSTART_CHECK_EARLY_EXIT(env, env->vl); 266 267 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 268 k = 0; 269 while (k < nf) { 270 if (!vm && !vext_elem_mask(v0, i)) { 271 /* set masked-off elements to 1s */ 272 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 273 (i + k * max_elems + 1) * esz); 274 k++; 275 continue; 276 } 277 target_ulong addr = base + stride * i + (k << log2_esz); 278 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 279 k++; 280 } 281 } 282 env->vstart = 0; 283 284 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 285 } 286 287 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 288 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 289 target_ulong stride, CPURISCVState *env, \ 290 uint32_t desc) \ 291 { \ 292 uint32_t vm = vext_vm(desc); \ 293 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 294 ctzl(sizeof(ETYPE)), GETPC()); \ 295 } 296 297 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb) 298 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb) 299 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb) 300 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb) 301 302 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 303 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 304 target_ulong stride, CPURISCVState *env, \ 305 uint32_t desc) \ 306 { \ 307 uint32_t vm = vext_vm(desc); \ 308 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 309 ctzl(sizeof(ETYPE)), GETPC()); \ 310 } 311 312 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb) 313 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb) 314 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb) 315 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) 316 317 /* 318 * unit-stride: access elements stored contiguously in memory 319 */ 320 321 /* unmasked unit-stride load and store operation */ 322 static inline QEMU_ALWAYS_INLINE void 323 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, 324 uint32_t elems, uint32_t nf, uint32_t max_elems, 325 uint32_t log2_esz, bool is_load, int mmu_index, 326 vext_ldst_elem_fn_tlb *ldst_tlb, 327 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra) 328 { 329 void *host; 330 int i, k, flags; 331 uint32_t esz = 1 << log2_esz; 332 uint32_t size = (elems * nf) << log2_esz; 333 uint32_t evl = env->vstart + elems; 334 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 335 336 /* Check page permission/pmp/watchpoint/etc. */ 337 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type, 338 mmu_index, true, &host, ra); 339 340 if (flags == 0) { 341 if (nf == 1) { 342 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart, 343 host, esz, is_load); 344 } else { 345 for (i = env->vstart; i < evl; ++i) { 346 k = 0; 347 while (k < nf) { 348 ldst_host(vd, i + k * max_elems, host); 349 host += esz; 350 k++; 351 } 352 } 353 } 354 env->vstart += elems; 355 } else { 356 if (nf == 1) { 357 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, 358 ra, esz, is_load); 359 } else { 360 /* load bytes from guest memory */ 361 for (i = env->vstart; i < evl; env->vstart = ++i) { 362 k = 0; 363 while (k < nf) { 364 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 365 vd, ra); 366 addr += esz; 367 k++; 368 } 369 } 370 } 371 } 372 } 373 374 static inline QEMU_ALWAYS_INLINE void 375 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 376 vext_ldst_elem_fn_tlb *ldst_tlb, 377 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 378 uint32_t evl, uintptr_t ra, bool is_load) 379 { 380 uint32_t k; 381 target_ulong page_split, elems, addr; 382 uint32_t nf = vext_nf(desc); 383 uint32_t max_elems = vext_max_elems(desc, log2_esz); 384 uint32_t esz = 1 << log2_esz; 385 uint32_t msize = nf * esz; 386 int mmu_index = riscv_env_mmu_index(env, false); 387 388 VSTART_CHECK_EARLY_EXIT(env, evl); 389 390 #if defined(CONFIG_USER_ONLY) 391 /* 392 * For data sizes <= 6 bytes we get better performance by simply calling 393 * vext_continuous_ldst_tlb 394 */ 395 if (nf == 1 && (evl << log2_esz) <= 6) { 396 addr = base + (env->vstart << log2_esz); 397 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra, 398 esz, is_load); 399 400 env->vstart = 0; 401 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 402 return; 403 } 404 #endif 405 406 /* Calculate the page range of first page */ 407 addr = base + ((env->vstart * nf) << log2_esz); 408 page_split = -(addr | TARGET_PAGE_MASK); 409 /* Get number of elements */ 410 elems = page_split / msize; 411 if (unlikely(env->vstart + elems >= evl)) { 412 elems = evl - env->vstart; 413 } 414 415 /* Load/store elements in the first page */ 416 if (likely(elems)) { 417 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 418 is_load, mmu_index, ldst_tlb, ldst_host, ra); 419 } 420 421 /* Load/store elements in the second page */ 422 if (unlikely(env->vstart < evl)) { 423 /* Cross page element */ 424 if (unlikely(page_split % msize)) { 425 for (k = 0; k < nf; k++) { 426 addr = base + ((env->vstart * nf + k) << log2_esz); 427 ldst_tlb(env, adjust_addr(env, addr), 428 env->vstart + k * max_elems, vd, ra); 429 } 430 env->vstart++; 431 } 432 433 addr = base + ((env->vstart * nf) << log2_esz); 434 /* Get number of elements of second page */ 435 elems = evl - env->vstart; 436 437 /* Load/store elements in the second page */ 438 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 439 is_load, mmu_index, ldst_tlb, ldst_host, ra); 440 } 441 442 env->vstart = 0; 443 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 444 } 445 446 /* 447 * masked unit-stride load and store operation will be a special case of 448 * stride, stride = NF * sizeof (ETYPE) 449 */ 450 451 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 452 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 453 CPURISCVState *env, uint32_t desc) \ 454 { \ 455 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 456 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 457 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 458 } \ 459 \ 460 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 461 CPURISCVState *env, uint32_t desc) \ 462 { \ 463 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 464 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \ 465 } 466 467 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host) 468 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host) 469 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host) 470 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host) 471 472 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 473 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 474 CPURISCVState *env, uint32_t desc) \ 475 { \ 476 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 477 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 478 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 479 } \ 480 \ 481 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 482 CPURISCVState *env, uint32_t desc) \ 483 { \ 484 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 485 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \ 486 } 487 488 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host) 489 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host) 490 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host) 491 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host) 492 493 /* 494 * unit stride mask load and store, EEW = 1 495 */ 496 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 497 CPURISCVState *env, uint32_t desc) 498 { 499 /* evl = ceil(vl/8) */ 500 uint8_t evl = (env->vl + 7) >> 3; 501 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host, 502 0, evl, GETPC(), true); 503 } 504 505 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 506 CPURISCVState *env, uint32_t desc) 507 { 508 /* evl = ceil(vl/8) */ 509 uint8_t evl = (env->vl + 7) >> 3; 510 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host, 511 0, evl, GETPC(), false); 512 } 513 514 /* 515 * index: access vector element from indexed memory 516 */ 517 typedef target_ulong vext_get_index_addr(target_ulong base, 518 uint32_t idx, void *vs2); 519 520 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 521 static target_ulong NAME(target_ulong base, \ 522 uint32_t idx, void *vs2) \ 523 { \ 524 return (base + *((ETYPE *)vs2 + H(idx))); \ 525 } 526 527 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 528 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 529 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 530 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 531 532 static inline void 533 vext_ldst_index(void *vd, void *v0, target_ulong base, 534 void *vs2, CPURISCVState *env, uint32_t desc, 535 vext_get_index_addr get_index_addr, 536 vext_ldst_elem_fn_tlb *ldst_elem, 537 uint32_t log2_esz, uintptr_t ra) 538 { 539 uint32_t i, k; 540 uint32_t nf = vext_nf(desc); 541 uint32_t vm = vext_vm(desc); 542 uint32_t max_elems = vext_max_elems(desc, log2_esz); 543 uint32_t esz = 1 << log2_esz; 544 uint32_t vma = vext_vma(desc); 545 546 VSTART_CHECK_EARLY_EXIT(env, env->vl); 547 548 /* load bytes from guest memory */ 549 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 550 k = 0; 551 while (k < nf) { 552 if (!vm && !vext_elem_mask(v0, i)) { 553 /* set masked-off elements to 1s */ 554 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 555 (i + k * max_elems + 1) * esz); 556 k++; 557 continue; 558 } 559 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 560 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 561 k++; 562 } 563 } 564 env->vstart = 0; 565 566 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 567 } 568 569 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 570 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 571 void *vs2, CPURISCVState *env, uint32_t desc) \ 572 { \ 573 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 574 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 575 } 576 577 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb) 578 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb) 579 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb) 580 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb) 581 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb) 582 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb) 583 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb) 584 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb) 585 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb) 586 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb) 587 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb) 588 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb) 589 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb) 590 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb) 591 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb) 592 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb) 593 594 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 595 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 596 void *vs2, CPURISCVState *env, uint32_t desc) \ 597 { \ 598 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 599 STORE_FN, ctzl(sizeof(ETYPE)), \ 600 GETPC()); \ 601 } 602 603 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb) 604 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb) 605 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb) 606 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb) 607 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb) 608 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb) 609 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb) 610 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb) 611 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb) 612 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb) 613 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb) 614 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb) 615 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb) 616 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb) 617 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb) 618 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb) 619 620 /* 621 * unit-stride fault-only-fisrt load instructions 622 */ 623 static inline void 624 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, 625 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, 626 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra) 627 { 628 uint32_t i, k, vl = 0; 629 uint32_t nf = vext_nf(desc); 630 uint32_t vm = vext_vm(desc); 631 uint32_t max_elems = vext_max_elems(desc, log2_esz); 632 uint32_t esz = 1 << log2_esz; 633 uint32_t msize = nf * esz; 634 uint32_t vma = vext_vma(desc); 635 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems; 636 int mmu_index = riscv_env_mmu_index(env, false); 637 int flags; 638 void *host; 639 640 VSTART_CHECK_EARLY_EXIT(env, env->vl); 641 642 addr = base + ((env->vstart * nf) << log2_esz); 643 page_split = -(addr | TARGET_PAGE_MASK); 644 /* Get number of elements */ 645 elems = page_split / msize; 646 if (unlikely(env->vstart + elems >= env->vl)) { 647 elems = env->vl - env->vstart; 648 } 649 650 /* Check page permission/pmp/watchpoint/etc. */ 651 flags = probe_access_flags(env, adjust_addr(env, addr), elems * msize, 652 MMU_DATA_LOAD, mmu_index, true, &host, ra); 653 654 /* If we are crossing a page check also the second page. */ 655 if (env->vl > elems) { 656 addr_probe = addr + (elems << log2_esz); 657 flags |= probe_access_flags(env, adjust_addr(env, addr_probe), 658 elems * msize, MMU_DATA_LOAD, mmu_index, 659 true, &host, ra); 660 } 661 662 if (flags & ~TLB_WATCHPOINT) { 663 /* probe every access */ 664 for (i = env->vstart; i < env->vl; i++) { 665 if (!vm && !vext_elem_mask(v0, i)) { 666 continue; 667 } 668 addr_i = adjust_addr(env, base + i * (nf << log2_esz)); 669 if (i == 0) { 670 /* Allow fault on first element. */ 671 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD); 672 } else { 673 remain = nf << log2_esz; 674 while (remain > 0) { 675 offset = -(addr_i | TARGET_PAGE_MASK); 676 677 /* Probe nonfault on subsequent elements. */ 678 flags = probe_access_flags(env, addr_i, offset, 679 MMU_DATA_LOAD, mmu_index, true, 680 &host, 0); 681 682 /* 683 * Stop if invalid (unmapped) or mmio (transaction may 684 * fail). Do not stop if watchpoint, as the spec says that 685 * first-fault should continue to access the same 686 * elements regardless of any watchpoint. 687 */ 688 if (flags & ~TLB_WATCHPOINT) { 689 vl = i; 690 goto ProbeSuccess; 691 } 692 if (remain <= offset) { 693 break; 694 } 695 remain -= offset; 696 addr_i = adjust_addr(env, addr_i + offset); 697 } 698 } 699 } 700 } 701 ProbeSuccess: 702 /* load bytes from guest memory */ 703 if (vl != 0) { 704 env->vl = vl; 705 } 706 707 if (env->vstart < env->vl) { 708 if (vm) { 709 /* Load/store elements in the first page */ 710 if (likely(elems)) { 711 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 712 log2_esz, true, mmu_index, ldst_tlb, 713 ldst_host, ra); 714 } 715 716 /* Load/store elements in the second page */ 717 if (unlikely(env->vstart < env->vl)) { 718 /* Cross page element */ 719 if (unlikely(page_split % msize)) { 720 for (k = 0; k < nf; k++) { 721 addr = base + ((env->vstart * nf + k) << log2_esz); 722 ldst_tlb(env, adjust_addr(env, addr), 723 env->vstart + k * max_elems, vd, ra); 724 } 725 env->vstart++; 726 } 727 728 addr = base + ((env->vstart * nf) << log2_esz); 729 /* Get number of elements of second page */ 730 elems = env->vl - env->vstart; 731 732 /* Load/store elements in the second page */ 733 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 734 log2_esz, true, mmu_index, ldst_tlb, 735 ldst_host, ra); 736 } 737 } else { 738 for (i = env->vstart; i < env->vl; i++) { 739 k = 0; 740 while (k < nf) { 741 if (!vext_elem_mask(v0, i)) { 742 /* set masked-off elements to 1s */ 743 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 744 (i + k * max_elems + 1) * esz); 745 k++; 746 continue; 747 } 748 addr = base + ((i * nf + k) << log2_esz); 749 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 750 vd, ra); 751 k++; 752 } 753 } 754 } 755 } 756 env->vstart = 0; 757 758 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 759 } 760 761 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 762 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 763 CPURISCVState *env, uint32_t desc) \ 764 { \ 765 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \ 766 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \ 767 } 768 769 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host) 770 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host) 771 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host) 772 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) 773 774 #define DO_SWAP(N, M) (M) 775 #define DO_AND(N, M) (N & M) 776 #define DO_XOR(N, M) (N ^ M) 777 #define DO_OR(N, M) (N | M) 778 #define DO_ADD(N, M) (N + M) 779 780 /* Signed min/max */ 781 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 782 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 783 784 /* 785 * load and store whole register instructions 786 */ 787 static inline QEMU_ALWAYS_INLINE void 788 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 789 vext_ldst_elem_fn_tlb *ldst_tlb, 790 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 791 uintptr_t ra, bool is_load) 792 { 793 target_ulong page_split, elems, addr; 794 uint32_t nf = vext_nf(desc); 795 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 796 uint32_t max_elems = vlenb >> log2_esz; 797 uint32_t evl = nf * max_elems; 798 uint32_t esz = 1 << log2_esz; 799 int mmu_index = riscv_env_mmu_index(env, false); 800 801 /* Calculate the page range of first page */ 802 addr = base + (env->vstart << log2_esz); 803 page_split = -(addr | TARGET_PAGE_MASK); 804 /* Get number of elements */ 805 elems = page_split / esz; 806 if (unlikely(env->vstart + elems >= evl)) { 807 elems = evl - env->vstart; 808 } 809 810 /* Load/store elements in the first page */ 811 if (likely(elems)) { 812 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 813 is_load, mmu_index, ldst_tlb, ldst_host, ra); 814 } 815 816 /* Load/store elements in the second page */ 817 if (unlikely(env->vstart < evl)) { 818 /* Cross page element */ 819 if (unlikely(page_split % esz)) { 820 addr = base + (env->vstart << log2_esz); 821 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra); 822 env->vstart++; 823 } 824 825 addr = base + (env->vstart << log2_esz); 826 /* Get number of elements of second page */ 827 elems = evl - env->vstart; 828 829 /* Load/store elements in the second page */ 830 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 831 is_load, mmu_index, ldst_tlb, ldst_host, ra); 832 } 833 834 env->vstart = 0; 835 } 836 837 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 838 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 839 uint32_t desc) \ 840 { \ 841 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 842 ctzl(sizeof(ETYPE)), GETPC(), true); \ 843 } 844 845 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host) 846 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host) 847 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host) 848 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host) 849 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host) 850 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host) 851 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host) 852 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host) 853 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host) 854 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host) 855 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host) 856 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host) 857 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host) 858 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host) 859 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host) 860 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host) 861 862 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 863 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 864 uint32_t desc) \ 865 { \ 866 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 867 ctzl(sizeof(ETYPE)), GETPC(), false); \ 868 } 869 870 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host) 871 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host) 872 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host) 873 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host) 874 875 /* 876 * Vector Integer Arithmetic Instructions 877 */ 878 879 /* (TD, T1, T2, TX1, TX2) */ 880 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 881 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 882 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 883 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 884 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 885 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 886 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 887 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 888 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 889 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 890 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 891 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 892 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 893 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 894 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 895 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 896 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 897 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 898 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 899 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 900 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 901 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 902 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 903 904 #define DO_SUB(N, M) (N - M) 905 #define DO_RSUB(N, M) (M - N) 906 907 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 908 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 909 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 910 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 911 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 912 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 913 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 914 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 915 916 GEN_VEXT_VV(vadd_vv_b, 1) 917 GEN_VEXT_VV(vadd_vv_h, 2) 918 GEN_VEXT_VV(vadd_vv_w, 4) 919 GEN_VEXT_VV(vadd_vv_d, 8) 920 GEN_VEXT_VV(vsub_vv_b, 1) 921 GEN_VEXT_VV(vsub_vv_h, 2) 922 GEN_VEXT_VV(vsub_vv_w, 4) 923 GEN_VEXT_VV(vsub_vv_d, 8) 924 925 926 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 927 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 928 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 929 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 930 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 931 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 932 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 933 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 934 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 935 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 936 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 937 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 938 939 GEN_VEXT_VX(vadd_vx_b, 1) 940 GEN_VEXT_VX(vadd_vx_h, 2) 941 GEN_VEXT_VX(vadd_vx_w, 4) 942 GEN_VEXT_VX(vadd_vx_d, 8) 943 GEN_VEXT_VX(vsub_vx_b, 1) 944 GEN_VEXT_VX(vsub_vx_h, 2) 945 GEN_VEXT_VX(vsub_vx_w, 4) 946 GEN_VEXT_VX(vsub_vx_d, 8) 947 GEN_VEXT_VX(vrsub_vx_b, 1) 948 GEN_VEXT_VX(vrsub_vx_h, 2) 949 GEN_VEXT_VX(vrsub_vx_w, 4) 950 GEN_VEXT_VX(vrsub_vx_d, 8) 951 952 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 953 { 954 intptr_t oprsz = simd_oprsz(desc); 955 intptr_t i; 956 957 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 958 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 959 } 960 } 961 962 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 963 { 964 intptr_t oprsz = simd_oprsz(desc); 965 intptr_t i; 966 967 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 968 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 969 } 970 } 971 972 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 973 { 974 intptr_t oprsz = simd_oprsz(desc); 975 intptr_t i; 976 977 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 978 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 979 } 980 } 981 982 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 983 { 984 intptr_t oprsz = simd_oprsz(desc); 985 intptr_t i; 986 987 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 988 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 989 } 990 } 991 992 /* Vector Widening Integer Add/Subtract */ 993 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 994 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 995 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 996 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 997 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 998 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 999 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 1000 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 1001 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 1002 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 1003 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 1004 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 1005 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 1006 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 1007 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 1008 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 1009 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 1010 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1011 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1012 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1013 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1014 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1015 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1016 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1017 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1018 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1019 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1020 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1021 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1022 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1023 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1024 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1025 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1026 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1027 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1028 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1029 GEN_VEXT_VV(vwaddu_vv_b, 2) 1030 GEN_VEXT_VV(vwaddu_vv_h, 4) 1031 GEN_VEXT_VV(vwaddu_vv_w, 8) 1032 GEN_VEXT_VV(vwsubu_vv_b, 2) 1033 GEN_VEXT_VV(vwsubu_vv_h, 4) 1034 GEN_VEXT_VV(vwsubu_vv_w, 8) 1035 GEN_VEXT_VV(vwadd_vv_b, 2) 1036 GEN_VEXT_VV(vwadd_vv_h, 4) 1037 GEN_VEXT_VV(vwadd_vv_w, 8) 1038 GEN_VEXT_VV(vwsub_vv_b, 2) 1039 GEN_VEXT_VV(vwsub_vv_h, 4) 1040 GEN_VEXT_VV(vwsub_vv_w, 8) 1041 GEN_VEXT_VV(vwaddu_wv_b, 2) 1042 GEN_VEXT_VV(vwaddu_wv_h, 4) 1043 GEN_VEXT_VV(vwaddu_wv_w, 8) 1044 GEN_VEXT_VV(vwsubu_wv_b, 2) 1045 GEN_VEXT_VV(vwsubu_wv_h, 4) 1046 GEN_VEXT_VV(vwsubu_wv_w, 8) 1047 GEN_VEXT_VV(vwadd_wv_b, 2) 1048 GEN_VEXT_VV(vwadd_wv_h, 4) 1049 GEN_VEXT_VV(vwadd_wv_w, 8) 1050 GEN_VEXT_VV(vwsub_wv_b, 2) 1051 GEN_VEXT_VV(vwsub_wv_h, 4) 1052 GEN_VEXT_VV(vwsub_wv_w, 8) 1053 1054 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1055 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1056 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1057 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1058 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1059 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1060 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1061 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1062 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1063 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1064 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1065 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1066 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1067 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1068 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1069 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1070 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1071 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1072 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1073 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1074 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1075 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1076 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1077 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1078 GEN_VEXT_VX(vwaddu_vx_b, 2) 1079 GEN_VEXT_VX(vwaddu_vx_h, 4) 1080 GEN_VEXT_VX(vwaddu_vx_w, 8) 1081 GEN_VEXT_VX(vwsubu_vx_b, 2) 1082 GEN_VEXT_VX(vwsubu_vx_h, 4) 1083 GEN_VEXT_VX(vwsubu_vx_w, 8) 1084 GEN_VEXT_VX(vwadd_vx_b, 2) 1085 GEN_VEXT_VX(vwadd_vx_h, 4) 1086 GEN_VEXT_VX(vwadd_vx_w, 8) 1087 GEN_VEXT_VX(vwsub_vx_b, 2) 1088 GEN_VEXT_VX(vwsub_vx_h, 4) 1089 GEN_VEXT_VX(vwsub_vx_w, 8) 1090 GEN_VEXT_VX(vwaddu_wx_b, 2) 1091 GEN_VEXT_VX(vwaddu_wx_h, 4) 1092 GEN_VEXT_VX(vwaddu_wx_w, 8) 1093 GEN_VEXT_VX(vwsubu_wx_b, 2) 1094 GEN_VEXT_VX(vwsubu_wx_h, 4) 1095 GEN_VEXT_VX(vwsubu_wx_w, 8) 1096 GEN_VEXT_VX(vwadd_wx_b, 2) 1097 GEN_VEXT_VX(vwadd_wx_h, 4) 1098 GEN_VEXT_VX(vwadd_wx_w, 8) 1099 GEN_VEXT_VX(vwsub_wx_b, 2) 1100 GEN_VEXT_VX(vwsub_wx_h, 4) 1101 GEN_VEXT_VX(vwsub_wx_w, 8) 1102 1103 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1104 #define DO_VADC(N, M, C) (N + M + C) 1105 #define DO_VSBC(N, M, C) (N - M - C) 1106 1107 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1108 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1109 CPURISCVState *env, uint32_t desc) \ 1110 { \ 1111 uint32_t vl = env->vl; \ 1112 uint32_t esz = sizeof(ETYPE); \ 1113 uint32_t total_elems = \ 1114 vext_get_total_elems(env, desc, esz); \ 1115 uint32_t vta = vext_vta(desc); \ 1116 uint32_t i; \ 1117 \ 1118 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1119 \ 1120 for (i = env->vstart; i < vl; i++) { \ 1121 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1122 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1123 ETYPE carry = vext_elem_mask(v0, i); \ 1124 \ 1125 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1126 } \ 1127 env->vstart = 0; \ 1128 /* set tail elements to 1s */ \ 1129 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1130 } 1131 1132 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1133 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1134 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1135 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1136 1137 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1138 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1139 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1140 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1141 1142 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1143 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1144 CPURISCVState *env, uint32_t desc) \ 1145 { \ 1146 uint32_t vl = env->vl; \ 1147 uint32_t esz = sizeof(ETYPE); \ 1148 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1149 uint32_t vta = vext_vta(desc); \ 1150 uint32_t i; \ 1151 \ 1152 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1153 \ 1154 for (i = env->vstart; i < vl; i++) { \ 1155 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1156 ETYPE carry = vext_elem_mask(v0, i); \ 1157 \ 1158 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1159 } \ 1160 env->vstart = 0; \ 1161 /* set tail elements to 1s */ \ 1162 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1163 } 1164 1165 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1166 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1167 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1168 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1169 1170 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1171 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1172 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1173 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1174 1175 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1176 (__typeof(N))(N + M) < N) 1177 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1178 1179 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1180 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1181 CPURISCVState *env, uint32_t desc) \ 1182 { \ 1183 uint32_t vl = env->vl; \ 1184 uint32_t vm = vext_vm(desc); \ 1185 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1186 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1187 uint32_t i; \ 1188 \ 1189 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1190 \ 1191 for (i = env->vstart; i < vl; i++) { \ 1192 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1193 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1194 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1195 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1196 } \ 1197 env->vstart = 0; \ 1198 /* 1199 * mask destination register are always tail-agnostic 1200 * set tail elements to 1s 1201 */ \ 1202 if (vta_all_1s) { \ 1203 for (; i < total_elems; i++) { \ 1204 vext_set_elem_mask(vd, i, 1); \ 1205 } \ 1206 } \ 1207 } 1208 1209 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1210 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1211 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1212 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1213 1214 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1215 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1216 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1217 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1218 1219 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1220 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1221 void *vs2, CPURISCVState *env, uint32_t desc) \ 1222 { \ 1223 uint32_t vl = env->vl; \ 1224 uint32_t vm = vext_vm(desc); \ 1225 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1226 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1227 uint32_t i; \ 1228 \ 1229 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1230 \ 1231 for (i = env->vstart; i < vl; i++) { \ 1232 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1233 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1234 vext_set_elem_mask(vd, i, \ 1235 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1236 } \ 1237 env->vstart = 0; \ 1238 /* 1239 * mask destination register are always tail-agnostic 1240 * set tail elements to 1s 1241 */ \ 1242 if (vta_all_1s) { \ 1243 for (; i < total_elems; i++) { \ 1244 vext_set_elem_mask(vd, i, 1); \ 1245 } \ 1246 } \ 1247 } 1248 1249 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1250 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1251 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1252 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1253 1254 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1255 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1256 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1257 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1258 1259 /* Vector Bitwise Logical Instructions */ 1260 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1261 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1262 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1263 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1264 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1265 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1266 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1267 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1268 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1269 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1270 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1271 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1272 GEN_VEXT_VV(vand_vv_b, 1) 1273 GEN_VEXT_VV(vand_vv_h, 2) 1274 GEN_VEXT_VV(vand_vv_w, 4) 1275 GEN_VEXT_VV(vand_vv_d, 8) 1276 GEN_VEXT_VV(vor_vv_b, 1) 1277 GEN_VEXT_VV(vor_vv_h, 2) 1278 GEN_VEXT_VV(vor_vv_w, 4) 1279 GEN_VEXT_VV(vor_vv_d, 8) 1280 GEN_VEXT_VV(vxor_vv_b, 1) 1281 GEN_VEXT_VV(vxor_vv_h, 2) 1282 GEN_VEXT_VV(vxor_vv_w, 4) 1283 GEN_VEXT_VV(vxor_vv_d, 8) 1284 1285 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1286 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1287 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1288 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1289 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1290 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1291 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1292 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1293 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1294 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1295 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1296 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1297 GEN_VEXT_VX(vand_vx_b, 1) 1298 GEN_VEXT_VX(vand_vx_h, 2) 1299 GEN_VEXT_VX(vand_vx_w, 4) 1300 GEN_VEXT_VX(vand_vx_d, 8) 1301 GEN_VEXT_VX(vor_vx_b, 1) 1302 GEN_VEXT_VX(vor_vx_h, 2) 1303 GEN_VEXT_VX(vor_vx_w, 4) 1304 GEN_VEXT_VX(vor_vx_d, 8) 1305 GEN_VEXT_VX(vxor_vx_b, 1) 1306 GEN_VEXT_VX(vxor_vx_h, 2) 1307 GEN_VEXT_VX(vxor_vx_w, 4) 1308 GEN_VEXT_VX(vxor_vx_d, 8) 1309 1310 /* Vector Single-Width Bit Shift Instructions */ 1311 #define DO_SLL(N, M) (N << (M)) 1312 #define DO_SRL(N, M) (N >> (M)) 1313 1314 /* generate the helpers for shift instructions with two vector operators */ 1315 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1316 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1317 void *vs2, CPURISCVState *env, uint32_t desc) \ 1318 { \ 1319 uint32_t vm = vext_vm(desc); \ 1320 uint32_t vl = env->vl; \ 1321 uint32_t esz = sizeof(TS1); \ 1322 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1323 uint32_t vta = vext_vta(desc); \ 1324 uint32_t vma = vext_vma(desc); \ 1325 uint32_t i; \ 1326 \ 1327 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1328 \ 1329 for (i = env->vstart; i < vl; i++) { \ 1330 if (!vm && !vext_elem_mask(v0, i)) { \ 1331 /* set masked-off elements to 1s */ \ 1332 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1333 continue; \ 1334 } \ 1335 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1336 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1337 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1338 } \ 1339 env->vstart = 0; \ 1340 /* set tail elements to 1s */ \ 1341 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1342 } 1343 1344 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1345 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1346 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1347 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1348 1349 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1350 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1351 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1352 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1353 1354 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1355 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1356 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1357 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1358 1359 /* 1360 * generate the helpers for shift instructions with one vector and one scalar 1361 */ 1362 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1363 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1364 void *vs2, CPURISCVState *env, \ 1365 uint32_t desc) \ 1366 { \ 1367 uint32_t vm = vext_vm(desc); \ 1368 uint32_t vl = env->vl; \ 1369 uint32_t esz = sizeof(TD); \ 1370 uint32_t total_elems = \ 1371 vext_get_total_elems(env, desc, esz); \ 1372 uint32_t vta = vext_vta(desc); \ 1373 uint32_t vma = vext_vma(desc); \ 1374 uint32_t i; \ 1375 \ 1376 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1377 \ 1378 for (i = env->vstart; i < vl; i++) { \ 1379 if (!vm && !vext_elem_mask(v0, i)) { \ 1380 /* set masked-off elements to 1s */ \ 1381 vext_set_elems_1s(vd, vma, i * esz, \ 1382 (i + 1) * esz); \ 1383 continue; \ 1384 } \ 1385 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1386 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1387 } \ 1388 env->vstart = 0; \ 1389 /* set tail elements to 1s */ \ 1390 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1391 } 1392 1393 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1394 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1395 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1396 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1397 1398 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1399 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1400 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1401 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1402 1403 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1404 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1405 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1406 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1407 1408 /* Vector Narrowing Integer Right Shift Instructions */ 1409 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1410 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1411 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1412 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1413 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1414 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1415 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1416 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1417 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1418 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1419 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1420 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1421 1422 /* Vector Integer Comparison Instructions */ 1423 #define DO_MSEQ(N, M) (N == M) 1424 #define DO_MSNE(N, M) (N != M) 1425 #define DO_MSLT(N, M) (N < M) 1426 #define DO_MSLE(N, M) (N <= M) 1427 #define DO_MSGT(N, M) (N > M) 1428 1429 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1430 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1431 CPURISCVState *env, uint32_t desc) \ 1432 { \ 1433 uint32_t vm = vext_vm(desc); \ 1434 uint32_t vl = env->vl; \ 1435 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1436 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1437 uint32_t vma = vext_vma(desc); \ 1438 uint32_t i; \ 1439 \ 1440 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1441 \ 1442 for (i = env->vstart; i < vl; i++) { \ 1443 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1444 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1445 if (!vm && !vext_elem_mask(v0, i)) { \ 1446 /* set masked-off elements to 1s */ \ 1447 if (vma) { \ 1448 vext_set_elem_mask(vd, i, 1); \ 1449 } \ 1450 continue; \ 1451 } \ 1452 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1453 } \ 1454 env->vstart = 0; \ 1455 /* 1456 * mask destination register are always tail-agnostic 1457 * set tail elements to 1s 1458 */ \ 1459 if (vta_all_1s) { \ 1460 for (; i < total_elems; i++) { \ 1461 vext_set_elem_mask(vd, i, 1); \ 1462 } \ 1463 } \ 1464 } 1465 1466 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1467 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1468 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1469 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1470 1471 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1472 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1473 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1474 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1475 1476 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1477 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1478 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1479 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1480 1481 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1482 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1483 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1484 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1485 1486 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1487 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1488 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1489 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1490 1491 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1492 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1493 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1494 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1495 1496 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1497 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1498 CPURISCVState *env, uint32_t desc) \ 1499 { \ 1500 uint32_t vm = vext_vm(desc); \ 1501 uint32_t vl = env->vl; \ 1502 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1503 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1504 uint32_t vma = vext_vma(desc); \ 1505 uint32_t i; \ 1506 \ 1507 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1508 \ 1509 for (i = env->vstart; i < vl; i++) { \ 1510 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1511 if (!vm && !vext_elem_mask(v0, i)) { \ 1512 /* set masked-off elements to 1s */ \ 1513 if (vma) { \ 1514 vext_set_elem_mask(vd, i, 1); \ 1515 } \ 1516 continue; \ 1517 } \ 1518 vext_set_elem_mask(vd, i, \ 1519 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1520 } \ 1521 env->vstart = 0; \ 1522 /* 1523 * mask destination register are always tail-agnostic 1524 * set tail elements to 1s 1525 */ \ 1526 if (vta_all_1s) { \ 1527 for (; i < total_elems; i++) { \ 1528 vext_set_elem_mask(vd, i, 1); \ 1529 } \ 1530 } \ 1531 } 1532 1533 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1534 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1535 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1536 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1537 1538 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1539 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1540 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1541 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1542 1543 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1544 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1545 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1546 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1547 1548 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1549 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1550 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1551 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1552 1553 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1554 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1555 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1556 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1557 1558 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1559 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1560 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1561 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1562 1563 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1564 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1565 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1566 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1567 1568 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1569 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1570 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1571 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1572 1573 /* Vector Integer Min/Max Instructions */ 1574 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1575 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1576 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1577 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1578 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1579 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1580 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1581 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1582 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1583 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1584 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1585 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1586 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1587 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1588 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1589 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1590 GEN_VEXT_VV(vminu_vv_b, 1) 1591 GEN_VEXT_VV(vminu_vv_h, 2) 1592 GEN_VEXT_VV(vminu_vv_w, 4) 1593 GEN_VEXT_VV(vminu_vv_d, 8) 1594 GEN_VEXT_VV(vmin_vv_b, 1) 1595 GEN_VEXT_VV(vmin_vv_h, 2) 1596 GEN_VEXT_VV(vmin_vv_w, 4) 1597 GEN_VEXT_VV(vmin_vv_d, 8) 1598 GEN_VEXT_VV(vmaxu_vv_b, 1) 1599 GEN_VEXT_VV(vmaxu_vv_h, 2) 1600 GEN_VEXT_VV(vmaxu_vv_w, 4) 1601 GEN_VEXT_VV(vmaxu_vv_d, 8) 1602 GEN_VEXT_VV(vmax_vv_b, 1) 1603 GEN_VEXT_VV(vmax_vv_h, 2) 1604 GEN_VEXT_VV(vmax_vv_w, 4) 1605 GEN_VEXT_VV(vmax_vv_d, 8) 1606 1607 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1608 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1609 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1610 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1611 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1612 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1613 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1614 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1615 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1616 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1617 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1618 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1619 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1620 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1621 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1622 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1623 GEN_VEXT_VX(vminu_vx_b, 1) 1624 GEN_VEXT_VX(vminu_vx_h, 2) 1625 GEN_VEXT_VX(vminu_vx_w, 4) 1626 GEN_VEXT_VX(vminu_vx_d, 8) 1627 GEN_VEXT_VX(vmin_vx_b, 1) 1628 GEN_VEXT_VX(vmin_vx_h, 2) 1629 GEN_VEXT_VX(vmin_vx_w, 4) 1630 GEN_VEXT_VX(vmin_vx_d, 8) 1631 GEN_VEXT_VX(vmaxu_vx_b, 1) 1632 GEN_VEXT_VX(vmaxu_vx_h, 2) 1633 GEN_VEXT_VX(vmaxu_vx_w, 4) 1634 GEN_VEXT_VX(vmaxu_vx_d, 8) 1635 GEN_VEXT_VX(vmax_vx_b, 1) 1636 GEN_VEXT_VX(vmax_vx_h, 2) 1637 GEN_VEXT_VX(vmax_vx_w, 4) 1638 GEN_VEXT_VX(vmax_vx_d, 8) 1639 1640 /* Vector Single-Width Integer Multiply Instructions */ 1641 #define DO_MUL(N, M) (N * M) 1642 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1643 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1644 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1645 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1646 GEN_VEXT_VV(vmul_vv_b, 1) 1647 GEN_VEXT_VV(vmul_vv_h, 2) 1648 GEN_VEXT_VV(vmul_vv_w, 4) 1649 GEN_VEXT_VV(vmul_vv_d, 8) 1650 1651 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1652 { 1653 return (int16_t)s2 * (int16_t)s1 >> 8; 1654 } 1655 1656 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1657 { 1658 return (int32_t)s2 * (int32_t)s1 >> 16; 1659 } 1660 1661 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1662 { 1663 return (int64_t)s2 * (int64_t)s1 >> 32; 1664 } 1665 1666 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1667 { 1668 uint64_t hi_64, lo_64; 1669 1670 muls64(&lo_64, &hi_64, s1, s2); 1671 return hi_64; 1672 } 1673 1674 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1675 { 1676 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1677 } 1678 1679 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1680 { 1681 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1682 } 1683 1684 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1685 { 1686 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1687 } 1688 1689 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1690 { 1691 uint64_t hi_64, lo_64; 1692 1693 mulu64(&lo_64, &hi_64, s2, s1); 1694 return hi_64; 1695 } 1696 1697 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1698 { 1699 return (int16_t)s2 * (uint16_t)s1 >> 8; 1700 } 1701 1702 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1703 { 1704 return (int32_t)s2 * (uint32_t)s1 >> 16; 1705 } 1706 1707 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1708 { 1709 return (int64_t)s2 * (uint64_t)s1 >> 32; 1710 } 1711 1712 /* 1713 * Let A = signed operand, 1714 * B = unsigned operand 1715 * P = mulu64(A, B), unsigned product 1716 * 1717 * LET X = 2 ** 64 - A, 2's complement of A 1718 * SP = signed product 1719 * THEN 1720 * IF A < 0 1721 * SP = -X * B 1722 * = -(2 ** 64 - A) * B 1723 * = A * B - 2 ** 64 * B 1724 * = P - 2 ** 64 * B 1725 * ELSE 1726 * SP = P 1727 * THEN 1728 * HI_P -= (A < 0 ? B : 0) 1729 */ 1730 1731 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1732 { 1733 uint64_t hi_64, lo_64; 1734 1735 mulu64(&lo_64, &hi_64, s2, s1); 1736 1737 hi_64 -= s2 < 0 ? s1 : 0; 1738 return hi_64; 1739 } 1740 1741 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1742 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1743 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1744 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1745 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1746 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1747 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1748 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1749 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1750 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1751 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1752 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1753 GEN_VEXT_VV(vmulh_vv_b, 1) 1754 GEN_VEXT_VV(vmulh_vv_h, 2) 1755 GEN_VEXT_VV(vmulh_vv_w, 4) 1756 GEN_VEXT_VV(vmulh_vv_d, 8) 1757 GEN_VEXT_VV(vmulhu_vv_b, 1) 1758 GEN_VEXT_VV(vmulhu_vv_h, 2) 1759 GEN_VEXT_VV(vmulhu_vv_w, 4) 1760 GEN_VEXT_VV(vmulhu_vv_d, 8) 1761 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1762 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1763 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1764 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1765 1766 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1767 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1768 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1769 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1770 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1771 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1772 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1773 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1774 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1775 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1776 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1777 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1778 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1779 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1780 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1781 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1782 GEN_VEXT_VX(vmul_vx_b, 1) 1783 GEN_VEXT_VX(vmul_vx_h, 2) 1784 GEN_VEXT_VX(vmul_vx_w, 4) 1785 GEN_VEXT_VX(vmul_vx_d, 8) 1786 GEN_VEXT_VX(vmulh_vx_b, 1) 1787 GEN_VEXT_VX(vmulh_vx_h, 2) 1788 GEN_VEXT_VX(vmulh_vx_w, 4) 1789 GEN_VEXT_VX(vmulh_vx_d, 8) 1790 GEN_VEXT_VX(vmulhu_vx_b, 1) 1791 GEN_VEXT_VX(vmulhu_vx_h, 2) 1792 GEN_VEXT_VX(vmulhu_vx_w, 4) 1793 GEN_VEXT_VX(vmulhu_vx_d, 8) 1794 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1795 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1796 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1797 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1798 1799 /* Vector Integer Divide Instructions */ 1800 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1801 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1802 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1803 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1804 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1805 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1806 1807 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1808 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1809 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1810 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1811 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1812 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1813 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1814 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1815 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1816 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1817 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1818 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1819 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1820 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1821 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1822 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1823 GEN_VEXT_VV(vdivu_vv_b, 1) 1824 GEN_VEXT_VV(vdivu_vv_h, 2) 1825 GEN_VEXT_VV(vdivu_vv_w, 4) 1826 GEN_VEXT_VV(vdivu_vv_d, 8) 1827 GEN_VEXT_VV(vdiv_vv_b, 1) 1828 GEN_VEXT_VV(vdiv_vv_h, 2) 1829 GEN_VEXT_VV(vdiv_vv_w, 4) 1830 GEN_VEXT_VV(vdiv_vv_d, 8) 1831 GEN_VEXT_VV(vremu_vv_b, 1) 1832 GEN_VEXT_VV(vremu_vv_h, 2) 1833 GEN_VEXT_VV(vremu_vv_w, 4) 1834 GEN_VEXT_VV(vremu_vv_d, 8) 1835 GEN_VEXT_VV(vrem_vv_b, 1) 1836 GEN_VEXT_VV(vrem_vv_h, 2) 1837 GEN_VEXT_VV(vrem_vv_w, 4) 1838 GEN_VEXT_VV(vrem_vv_d, 8) 1839 1840 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1841 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1842 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1843 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1844 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1845 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1846 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1847 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1848 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1849 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1850 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1851 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1852 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1853 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1854 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1855 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1856 GEN_VEXT_VX(vdivu_vx_b, 1) 1857 GEN_VEXT_VX(vdivu_vx_h, 2) 1858 GEN_VEXT_VX(vdivu_vx_w, 4) 1859 GEN_VEXT_VX(vdivu_vx_d, 8) 1860 GEN_VEXT_VX(vdiv_vx_b, 1) 1861 GEN_VEXT_VX(vdiv_vx_h, 2) 1862 GEN_VEXT_VX(vdiv_vx_w, 4) 1863 GEN_VEXT_VX(vdiv_vx_d, 8) 1864 GEN_VEXT_VX(vremu_vx_b, 1) 1865 GEN_VEXT_VX(vremu_vx_h, 2) 1866 GEN_VEXT_VX(vremu_vx_w, 4) 1867 GEN_VEXT_VX(vremu_vx_d, 8) 1868 GEN_VEXT_VX(vrem_vx_b, 1) 1869 GEN_VEXT_VX(vrem_vx_h, 2) 1870 GEN_VEXT_VX(vrem_vx_w, 4) 1871 GEN_VEXT_VX(vrem_vx_d, 8) 1872 1873 /* Vector Widening Integer Multiply Instructions */ 1874 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1875 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1876 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1877 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1878 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1879 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1880 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1881 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1882 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1883 GEN_VEXT_VV(vwmul_vv_b, 2) 1884 GEN_VEXT_VV(vwmul_vv_h, 4) 1885 GEN_VEXT_VV(vwmul_vv_w, 8) 1886 GEN_VEXT_VV(vwmulu_vv_b, 2) 1887 GEN_VEXT_VV(vwmulu_vv_h, 4) 1888 GEN_VEXT_VV(vwmulu_vv_w, 8) 1889 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1890 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1891 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1892 1893 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1894 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1895 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1896 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1897 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1898 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1899 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1900 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1901 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1902 GEN_VEXT_VX(vwmul_vx_b, 2) 1903 GEN_VEXT_VX(vwmul_vx_h, 4) 1904 GEN_VEXT_VX(vwmul_vx_w, 8) 1905 GEN_VEXT_VX(vwmulu_vx_b, 2) 1906 GEN_VEXT_VX(vwmulu_vx_h, 4) 1907 GEN_VEXT_VX(vwmulu_vx_w, 8) 1908 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1909 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1910 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1911 1912 /* Vector Single-Width Integer Multiply-Add Instructions */ 1913 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1914 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1915 { \ 1916 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1917 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1918 TD d = *((TD *)vd + HD(i)); \ 1919 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1920 } 1921 1922 #define DO_MACC(N, M, D) (M * N + D) 1923 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1924 #define DO_MADD(N, M, D) (M * D + N) 1925 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1926 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1927 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1928 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1929 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1930 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1931 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1932 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1933 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1934 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1935 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1936 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1937 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1938 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1939 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1940 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1941 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1942 GEN_VEXT_VV(vmacc_vv_b, 1) 1943 GEN_VEXT_VV(vmacc_vv_h, 2) 1944 GEN_VEXT_VV(vmacc_vv_w, 4) 1945 GEN_VEXT_VV(vmacc_vv_d, 8) 1946 GEN_VEXT_VV(vnmsac_vv_b, 1) 1947 GEN_VEXT_VV(vnmsac_vv_h, 2) 1948 GEN_VEXT_VV(vnmsac_vv_w, 4) 1949 GEN_VEXT_VV(vnmsac_vv_d, 8) 1950 GEN_VEXT_VV(vmadd_vv_b, 1) 1951 GEN_VEXT_VV(vmadd_vv_h, 2) 1952 GEN_VEXT_VV(vmadd_vv_w, 4) 1953 GEN_VEXT_VV(vmadd_vv_d, 8) 1954 GEN_VEXT_VV(vnmsub_vv_b, 1) 1955 GEN_VEXT_VV(vnmsub_vv_h, 2) 1956 GEN_VEXT_VV(vnmsub_vv_w, 4) 1957 GEN_VEXT_VV(vnmsub_vv_d, 8) 1958 1959 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1960 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1961 { \ 1962 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1963 TD d = *((TD *)vd + HD(i)); \ 1964 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1965 } 1966 1967 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1968 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1969 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1970 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1971 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1972 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1973 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1974 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1975 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1976 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1977 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1978 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1979 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1980 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1981 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1982 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1983 GEN_VEXT_VX(vmacc_vx_b, 1) 1984 GEN_VEXT_VX(vmacc_vx_h, 2) 1985 GEN_VEXT_VX(vmacc_vx_w, 4) 1986 GEN_VEXT_VX(vmacc_vx_d, 8) 1987 GEN_VEXT_VX(vnmsac_vx_b, 1) 1988 GEN_VEXT_VX(vnmsac_vx_h, 2) 1989 GEN_VEXT_VX(vnmsac_vx_w, 4) 1990 GEN_VEXT_VX(vnmsac_vx_d, 8) 1991 GEN_VEXT_VX(vmadd_vx_b, 1) 1992 GEN_VEXT_VX(vmadd_vx_h, 2) 1993 GEN_VEXT_VX(vmadd_vx_w, 4) 1994 GEN_VEXT_VX(vmadd_vx_d, 8) 1995 GEN_VEXT_VX(vnmsub_vx_b, 1) 1996 GEN_VEXT_VX(vnmsub_vx_h, 2) 1997 GEN_VEXT_VX(vnmsub_vx_w, 4) 1998 GEN_VEXT_VX(vnmsub_vx_d, 8) 1999 2000 /* Vector Widening Integer Multiply-Add Instructions */ 2001 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 2002 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 2003 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 2004 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 2005 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 2006 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 2007 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 2008 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 2009 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 2010 GEN_VEXT_VV(vwmaccu_vv_b, 2) 2011 GEN_VEXT_VV(vwmaccu_vv_h, 4) 2012 GEN_VEXT_VV(vwmaccu_vv_w, 8) 2013 GEN_VEXT_VV(vwmacc_vv_b, 2) 2014 GEN_VEXT_VV(vwmacc_vv_h, 4) 2015 GEN_VEXT_VV(vwmacc_vv_w, 8) 2016 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 2017 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 2018 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 2019 2020 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 2021 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 2022 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 2023 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 2024 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 2025 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 2026 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 2027 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 2028 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 2029 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 2030 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 2031 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 2032 GEN_VEXT_VX(vwmaccu_vx_b, 2) 2033 GEN_VEXT_VX(vwmaccu_vx_h, 4) 2034 GEN_VEXT_VX(vwmaccu_vx_w, 8) 2035 GEN_VEXT_VX(vwmacc_vx_b, 2) 2036 GEN_VEXT_VX(vwmacc_vx_h, 4) 2037 GEN_VEXT_VX(vwmacc_vx_w, 8) 2038 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 2039 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 2040 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 2041 GEN_VEXT_VX(vwmaccus_vx_b, 2) 2042 GEN_VEXT_VX(vwmaccus_vx_h, 4) 2043 GEN_VEXT_VX(vwmaccus_vx_w, 8) 2044 2045 /* Vector Integer Merge and Move Instructions */ 2046 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 2047 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2048 uint32_t desc) \ 2049 { \ 2050 uint32_t vl = env->vl; \ 2051 uint32_t esz = sizeof(ETYPE); \ 2052 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2053 uint32_t vta = vext_vta(desc); \ 2054 uint32_t i; \ 2055 \ 2056 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2057 \ 2058 for (i = env->vstart; i < vl; i++) { \ 2059 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2060 *((ETYPE *)vd + H(i)) = s1; \ 2061 } \ 2062 env->vstart = 0; \ 2063 /* set tail elements to 1s */ \ 2064 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2065 } 2066 2067 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2068 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2069 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2070 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2071 2072 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2073 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2074 uint32_t desc) \ 2075 { \ 2076 uint32_t vl = env->vl; \ 2077 uint32_t esz = sizeof(ETYPE); \ 2078 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2079 uint32_t vta = vext_vta(desc); \ 2080 uint32_t i; \ 2081 \ 2082 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2083 \ 2084 for (i = env->vstart; i < vl; i++) { \ 2085 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2086 } \ 2087 env->vstart = 0; \ 2088 /* set tail elements to 1s */ \ 2089 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2090 } 2091 2092 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2093 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2094 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2095 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2096 2097 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2098 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2099 CPURISCVState *env, uint32_t desc) \ 2100 { \ 2101 uint32_t vl = env->vl; \ 2102 uint32_t esz = sizeof(ETYPE); \ 2103 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2104 uint32_t vta = vext_vta(desc); \ 2105 uint32_t i; \ 2106 \ 2107 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2108 \ 2109 for (i = env->vstart; i < vl; i++) { \ 2110 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2111 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2112 } \ 2113 env->vstart = 0; \ 2114 /* set tail elements to 1s */ \ 2115 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2116 } 2117 2118 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2119 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2120 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2121 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2122 2123 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2124 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2125 void *vs2, CPURISCVState *env, uint32_t desc) \ 2126 { \ 2127 uint32_t vl = env->vl; \ 2128 uint32_t esz = sizeof(ETYPE); \ 2129 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2130 uint32_t vta = vext_vta(desc); \ 2131 uint32_t i; \ 2132 \ 2133 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2134 \ 2135 for (i = env->vstart; i < vl; i++) { \ 2136 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2137 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2138 (ETYPE)(target_long)s1); \ 2139 *((ETYPE *)vd + H(i)) = d; \ 2140 } \ 2141 env->vstart = 0; \ 2142 /* set tail elements to 1s */ \ 2143 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2144 } 2145 2146 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2147 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2148 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2149 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2150 2151 /* 2152 * Vector Fixed-Point Arithmetic Instructions 2153 */ 2154 2155 /* Vector Single-Width Saturating Add and Subtract */ 2156 2157 /* 2158 * As fixed point instructions probably have round mode and saturation, 2159 * define common macros for fixed point here. 2160 */ 2161 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2162 CPURISCVState *env, int vxrm); 2163 2164 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2165 static inline void \ 2166 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2167 CPURISCVState *env, int vxrm) \ 2168 { \ 2169 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2170 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2171 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2172 } 2173 2174 static inline void 2175 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2176 CPURISCVState *env, 2177 uint32_t vl, uint32_t vm, int vxrm, 2178 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2179 { 2180 for (uint32_t i = env->vstart; i < vl; i++) { 2181 if (!vm && !vext_elem_mask(v0, i)) { 2182 /* set masked-off elements to 1s */ 2183 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2184 continue; 2185 } 2186 fn(vd, vs1, vs2, i, env, vxrm); 2187 } 2188 env->vstart = 0; 2189 } 2190 2191 static inline void 2192 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2193 CPURISCVState *env, 2194 uint32_t desc, 2195 opivv2_rm_fn *fn, uint32_t esz) 2196 { 2197 uint32_t vm = vext_vm(desc); 2198 uint32_t vl = env->vl; 2199 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2200 uint32_t vta = vext_vta(desc); 2201 uint32_t vma = vext_vma(desc); 2202 2203 VSTART_CHECK_EARLY_EXIT(env, vl); 2204 2205 switch (env->vxrm) { 2206 case 0: /* rnu */ 2207 vext_vv_rm_1(vd, v0, vs1, vs2, 2208 env, vl, vm, 0, fn, vma, esz); 2209 break; 2210 case 1: /* rne */ 2211 vext_vv_rm_1(vd, v0, vs1, vs2, 2212 env, vl, vm, 1, fn, vma, esz); 2213 break; 2214 case 2: /* rdn */ 2215 vext_vv_rm_1(vd, v0, vs1, vs2, 2216 env, vl, vm, 2, fn, vma, esz); 2217 break; 2218 default: /* rod */ 2219 vext_vv_rm_1(vd, v0, vs1, vs2, 2220 env, vl, vm, 3, fn, vma, esz); 2221 break; 2222 } 2223 /* set tail elements to 1s */ 2224 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2225 } 2226 2227 /* generate helpers for fixed point instructions with OPIVV format */ 2228 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2229 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2230 CPURISCVState *env, uint32_t desc) \ 2231 { \ 2232 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2233 do_##NAME, ESZ); \ 2234 } 2235 2236 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2237 uint8_t b) 2238 { 2239 uint8_t res = a + b; 2240 if (res < a) { 2241 res = UINT8_MAX; 2242 env->vxsat = 0x1; 2243 } 2244 return res; 2245 } 2246 2247 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2248 uint16_t b) 2249 { 2250 uint16_t res = a + b; 2251 if (res < a) { 2252 res = UINT16_MAX; 2253 env->vxsat = 0x1; 2254 } 2255 return res; 2256 } 2257 2258 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2259 uint32_t b) 2260 { 2261 uint32_t res = a + b; 2262 if (res < a) { 2263 res = UINT32_MAX; 2264 env->vxsat = 0x1; 2265 } 2266 return res; 2267 } 2268 2269 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2270 uint64_t b) 2271 { 2272 uint64_t res = a + b; 2273 if (res < a) { 2274 res = UINT64_MAX; 2275 env->vxsat = 0x1; 2276 } 2277 return res; 2278 } 2279 2280 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2281 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2282 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2283 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2284 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2285 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2286 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2287 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2288 2289 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2290 CPURISCVState *env, int vxrm); 2291 2292 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2293 static inline void \ 2294 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2295 CPURISCVState *env, int vxrm) \ 2296 { \ 2297 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2298 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2299 } 2300 2301 static inline void 2302 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2303 CPURISCVState *env, 2304 uint32_t vl, uint32_t vm, int vxrm, 2305 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2306 { 2307 for (uint32_t i = env->vstart; i < vl; i++) { 2308 if (!vm && !vext_elem_mask(v0, i)) { 2309 /* set masked-off elements to 1s */ 2310 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2311 continue; 2312 } 2313 fn(vd, s1, vs2, i, env, vxrm); 2314 } 2315 env->vstart = 0; 2316 } 2317 2318 static inline void 2319 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2320 CPURISCVState *env, 2321 uint32_t desc, 2322 opivx2_rm_fn *fn, uint32_t esz) 2323 { 2324 uint32_t vm = vext_vm(desc); 2325 uint32_t vl = env->vl; 2326 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2327 uint32_t vta = vext_vta(desc); 2328 uint32_t vma = vext_vma(desc); 2329 2330 VSTART_CHECK_EARLY_EXIT(env, vl); 2331 2332 switch (env->vxrm) { 2333 case 0: /* rnu */ 2334 vext_vx_rm_1(vd, v0, s1, vs2, 2335 env, vl, vm, 0, fn, vma, esz); 2336 break; 2337 case 1: /* rne */ 2338 vext_vx_rm_1(vd, v0, s1, vs2, 2339 env, vl, vm, 1, fn, vma, esz); 2340 break; 2341 case 2: /* rdn */ 2342 vext_vx_rm_1(vd, v0, s1, vs2, 2343 env, vl, vm, 2, fn, vma, esz); 2344 break; 2345 default: /* rod */ 2346 vext_vx_rm_1(vd, v0, s1, vs2, 2347 env, vl, vm, 3, fn, vma, esz); 2348 break; 2349 } 2350 /* set tail elements to 1s */ 2351 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2352 } 2353 2354 /* generate helpers for fixed point instructions with OPIVX format */ 2355 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2356 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2357 void *vs2, CPURISCVState *env, \ 2358 uint32_t desc) \ 2359 { \ 2360 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2361 do_##NAME, ESZ); \ 2362 } 2363 2364 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2365 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2366 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2367 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2368 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2369 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2370 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2371 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2372 2373 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2374 { 2375 int8_t res = a + b; 2376 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2377 res = a > 0 ? INT8_MAX : INT8_MIN; 2378 env->vxsat = 0x1; 2379 } 2380 return res; 2381 } 2382 2383 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2384 int16_t b) 2385 { 2386 int16_t res = a + b; 2387 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2388 res = a > 0 ? INT16_MAX : INT16_MIN; 2389 env->vxsat = 0x1; 2390 } 2391 return res; 2392 } 2393 2394 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2395 int32_t b) 2396 { 2397 int32_t res = a + b; 2398 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2399 res = a > 0 ? INT32_MAX : INT32_MIN; 2400 env->vxsat = 0x1; 2401 } 2402 return res; 2403 } 2404 2405 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2406 int64_t b) 2407 { 2408 int64_t res = a + b; 2409 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2410 res = a > 0 ? INT64_MAX : INT64_MIN; 2411 env->vxsat = 0x1; 2412 } 2413 return res; 2414 } 2415 2416 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2417 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2418 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2419 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2420 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2421 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2422 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2423 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2424 2425 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2426 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2427 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2428 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2429 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2430 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2431 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2432 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2433 2434 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2435 uint8_t b) 2436 { 2437 uint8_t res = a - b; 2438 if (res > a) { 2439 res = 0; 2440 env->vxsat = 0x1; 2441 } 2442 return res; 2443 } 2444 2445 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2446 uint16_t b) 2447 { 2448 uint16_t res = a - b; 2449 if (res > a) { 2450 res = 0; 2451 env->vxsat = 0x1; 2452 } 2453 return res; 2454 } 2455 2456 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2457 uint32_t b) 2458 { 2459 uint32_t res = a - b; 2460 if (res > a) { 2461 res = 0; 2462 env->vxsat = 0x1; 2463 } 2464 return res; 2465 } 2466 2467 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2468 uint64_t b) 2469 { 2470 uint64_t res = a - b; 2471 if (res > a) { 2472 res = 0; 2473 env->vxsat = 0x1; 2474 } 2475 return res; 2476 } 2477 2478 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2479 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2480 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2481 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2482 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2483 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2484 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2485 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2486 2487 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2488 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2489 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2490 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2491 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2492 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2493 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2494 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2495 2496 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2497 { 2498 int8_t res = a - b; 2499 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2500 res = a >= 0 ? INT8_MAX : INT8_MIN; 2501 env->vxsat = 0x1; 2502 } 2503 return res; 2504 } 2505 2506 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2507 int16_t b) 2508 { 2509 int16_t res = a - b; 2510 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2511 res = a >= 0 ? INT16_MAX : INT16_MIN; 2512 env->vxsat = 0x1; 2513 } 2514 return res; 2515 } 2516 2517 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2518 int32_t b) 2519 { 2520 int32_t res = a - b; 2521 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2522 res = a >= 0 ? INT32_MAX : INT32_MIN; 2523 env->vxsat = 0x1; 2524 } 2525 return res; 2526 } 2527 2528 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2529 int64_t b) 2530 { 2531 int64_t res = a - b; 2532 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2533 res = a >= 0 ? INT64_MAX : INT64_MIN; 2534 env->vxsat = 0x1; 2535 } 2536 return res; 2537 } 2538 2539 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2540 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2541 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2542 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2543 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2544 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2545 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2546 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2547 2548 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2549 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2550 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2551 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2552 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2553 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2554 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2555 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2556 2557 /* Vector Single-Width Averaging Add and Subtract */ 2558 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2559 { 2560 uint8_t d = extract64(v, shift, 1); 2561 uint8_t d1; 2562 uint64_t D1, D2; 2563 2564 if (shift == 0 || shift > 64) { 2565 return 0; 2566 } 2567 2568 d1 = extract64(v, shift - 1, 1); 2569 D1 = extract64(v, 0, shift); 2570 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2571 return d1; 2572 } else if (vxrm == 1) { /* round-to-nearest-even */ 2573 if (shift > 1) { 2574 D2 = extract64(v, 0, shift - 1); 2575 return d1 & ((D2 != 0) | d); 2576 } else { 2577 return d1 & d; 2578 } 2579 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2580 return !d & (D1 != 0); 2581 } 2582 return 0; /* round-down (truncate) */ 2583 } 2584 2585 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2586 int32_t b) 2587 { 2588 int64_t res = (int64_t)a + b; 2589 uint8_t round = get_round(vxrm, res, 1); 2590 2591 return (res >> 1) + round; 2592 } 2593 2594 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2595 int64_t b) 2596 { 2597 int64_t res = a + b; 2598 uint8_t round = get_round(vxrm, res, 1); 2599 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2600 2601 /* With signed overflow, bit 64 is inverse of bit 63. */ 2602 return ((res >> 1) ^ over) + round; 2603 } 2604 2605 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2606 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2607 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2608 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2609 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2610 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2611 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2612 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2613 2614 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2615 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2616 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2617 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2618 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2619 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2620 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2621 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2622 2623 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2624 uint32_t a, uint32_t b) 2625 { 2626 uint64_t res = (uint64_t)a + b; 2627 uint8_t round = get_round(vxrm, res, 1); 2628 2629 return (res >> 1) + round; 2630 } 2631 2632 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2633 uint64_t a, uint64_t b) 2634 { 2635 uint64_t res = a + b; 2636 uint8_t round = get_round(vxrm, res, 1); 2637 uint64_t over = (uint64_t)(res < a) << 63; 2638 2639 return ((res >> 1) | over) + round; 2640 } 2641 2642 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2643 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2644 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2645 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2646 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2647 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2648 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2649 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2650 2651 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2652 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2653 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2654 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2655 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2656 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2657 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2658 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2659 2660 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2661 int32_t b) 2662 { 2663 int64_t res = (int64_t)a - b; 2664 uint8_t round = get_round(vxrm, res, 1); 2665 2666 return (res >> 1) + round; 2667 } 2668 2669 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2670 int64_t b) 2671 { 2672 int64_t res = (int64_t)a - b; 2673 uint8_t round = get_round(vxrm, res, 1); 2674 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2675 2676 /* With signed overflow, bit 64 is inverse of bit 63. */ 2677 return ((res >> 1) ^ over) + round; 2678 } 2679 2680 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2681 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2682 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2683 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2684 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2685 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2686 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2687 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2688 2689 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2690 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2691 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2692 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2693 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2694 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2695 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2696 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2697 2698 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2699 uint32_t a, uint32_t b) 2700 { 2701 int64_t res = (int64_t)a - b; 2702 uint8_t round = get_round(vxrm, res, 1); 2703 2704 return (res >> 1) + round; 2705 } 2706 2707 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2708 uint64_t a, uint64_t b) 2709 { 2710 uint64_t res = (uint64_t)a - b; 2711 uint8_t round = get_round(vxrm, res, 1); 2712 uint64_t over = (uint64_t)(res > a) << 63; 2713 2714 return ((res >> 1) | over) + round; 2715 } 2716 2717 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2718 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2719 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2720 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2721 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2722 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2723 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2724 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2725 2726 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2727 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2728 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2729 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2730 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2731 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2732 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2733 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2734 2735 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2736 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2737 { 2738 uint8_t round; 2739 int16_t res; 2740 2741 res = (int16_t)a * (int16_t)b; 2742 round = get_round(vxrm, res, 7); 2743 res = (res >> 7) + round; 2744 2745 if (res > INT8_MAX) { 2746 env->vxsat = 0x1; 2747 return INT8_MAX; 2748 } else if (res < INT8_MIN) { 2749 env->vxsat = 0x1; 2750 return INT8_MIN; 2751 } else { 2752 return res; 2753 } 2754 } 2755 2756 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2757 { 2758 uint8_t round; 2759 int32_t res; 2760 2761 res = (int32_t)a * (int32_t)b; 2762 round = get_round(vxrm, res, 15); 2763 res = (res >> 15) + round; 2764 2765 if (res > INT16_MAX) { 2766 env->vxsat = 0x1; 2767 return INT16_MAX; 2768 } else if (res < INT16_MIN) { 2769 env->vxsat = 0x1; 2770 return INT16_MIN; 2771 } else { 2772 return res; 2773 } 2774 } 2775 2776 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2777 { 2778 uint8_t round; 2779 int64_t res; 2780 2781 res = (int64_t)a * (int64_t)b; 2782 round = get_round(vxrm, res, 31); 2783 res = (res >> 31) + round; 2784 2785 if (res > INT32_MAX) { 2786 env->vxsat = 0x1; 2787 return INT32_MAX; 2788 } else if (res < INT32_MIN) { 2789 env->vxsat = 0x1; 2790 return INT32_MIN; 2791 } else { 2792 return res; 2793 } 2794 } 2795 2796 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2797 { 2798 uint8_t round; 2799 uint64_t hi_64, lo_64; 2800 int64_t res; 2801 2802 if (a == INT64_MIN && b == INT64_MIN) { 2803 env->vxsat = 1; 2804 return INT64_MAX; 2805 } 2806 2807 muls64(&lo_64, &hi_64, a, b); 2808 round = get_round(vxrm, lo_64, 63); 2809 /* 2810 * Cannot overflow, as there are always 2811 * 2 sign bits after multiply. 2812 */ 2813 res = (hi_64 << 1) | (lo_64 >> 63); 2814 if (round) { 2815 if (res == INT64_MAX) { 2816 env->vxsat = 1; 2817 } else { 2818 res += 1; 2819 } 2820 } 2821 return res; 2822 } 2823 2824 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2825 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2826 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2827 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2828 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2829 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2830 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2831 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2832 2833 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2834 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2835 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2836 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2837 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2838 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2839 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2840 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2841 2842 /* Vector Single-Width Scaling Shift Instructions */ 2843 static inline uint8_t 2844 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2845 { 2846 uint8_t round, shift = b & 0x7; 2847 uint8_t res; 2848 2849 round = get_round(vxrm, a, shift); 2850 res = (a >> shift) + round; 2851 return res; 2852 } 2853 static inline uint16_t 2854 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2855 { 2856 uint8_t round, shift = b & 0xf; 2857 2858 round = get_round(vxrm, a, shift); 2859 return (a >> shift) + round; 2860 } 2861 static inline uint32_t 2862 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2863 { 2864 uint8_t round, shift = b & 0x1f; 2865 2866 round = get_round(vxrm, a, shift); 2867 return (a >> shift) + round; 2868 } 2869 static inline uint64_t 2870 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2871 { 2872 uint8_t round, shift = b & 0x3f; 2873 2874 round = get_round(vxrm, a, shift); 2875 return (a >> shift) + round; 2876 } 2877 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2878 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2879 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2880 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2881 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2882 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2883 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2884 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2885 2886 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2887 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2888 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2889 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2890 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2891 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2892 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2893 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2894 2895 static inline int8_t 2896 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2897 { 2898 uint8_t round, shift = b & 0x7; 2899 2900 round = get_round(vxrm, a, shift); 2901 return (a >> shift) + round; 2902 } 2903 static inline int16_t 2904 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2905 { 2906 uint8_t round, shift = b & 0xf; 2907 2908 round = get_round(vxrm, a, shift); 2909 return (a >> shift) + round; 2910 } 2911 static inline int32_t 2912 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2913 { 2914 uint8_t round, shift = b & 0x1f; 2915 2916 round = get_round(vxrm, a, shift); 2917 return (a >> shift) + round; 2918 } 2919 static inline int64_t 2920 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2921 { 2922 uint8_t round, shift = b & 0x3f; 2923 2924 round = get_round(vxrm, a, shift); 2925 return (a >> shift) + round; 2926 } 2927 2928 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2929 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2930 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2931 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2932 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2933 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2934 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2935 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2936 2937 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2938 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2939 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2940 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2941 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2942 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2943 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2944 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2945 2946 /* Vector Narrowing Fixed-Point Clip Instructions */ 2947 static inline int8_t 2948 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2949 { 2950 uint8_t round, shift = b & 0xf; 2951 int16_t res; 2952 2953 round = get_round(vxrm, a, shift); 2954 res = (a >> shift) + round; 2955 if (res > INT8_MAX) { 2956 env->vxsat = 0x1; 2957 return INT8_MAX; 2958 } else if (res < INT8_MIN) { 2959 env->vxsat = 0x1; 2960 return INT8_MIN; 2961 } else { 2962 return res; 2963 } 2964 } 2965 2966 static inline int16_t 2967 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2968 { 2969 uint8_t round, shift = b & 0x1f; 2970 int32_t res; 2971 2972 round = get_round(vxrm, a, shift); 2973 res = (a >> shift) + round; 2974 if (res > INT16_MAX) { 2975 env->vxsat = 0x1; 2976 return INT16_MAX; 2977 } else if (res < INT16_MIN) { 2978 env->vxsat = 0x1; 2979 return INT16_MIN; 2980 } else { 2981 return res; 2982 } 2983 } 2984 2985 static inline int32_t 2986 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2987 { 2988 uint8_t round, shift = b & 0x3f; 2989 int64_t res; 2990 2991 round = get_round(vxrm, a, shift); 2992 res = (a >> shift) + round; 2993 if (res > INT32_MAX) { 2994 env->vxsat = 0x1; 2995 return INT32_MAX; 2996 } else if (res < INT32_MIN) { 2997 env->vxsat = 0x1; 2998 return INT32_MIN; 2999 } else { 3000 return res; 3001 } 3002 } 3003 3004 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 3005 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 3006 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 3007 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 3008 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 3009 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 3010 3011 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 3012 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 3013 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 3014 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 3015 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 3016 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 3017 3018 static inline uint8_t 3019 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 3020 { 3021 uint8_t round, shift = b & 0xf; 3022 uint16_t res; 3023 3024 round = get_round(vxrm, a, shift); 3025 res = (a >> shift) + round; 3026 if (res > UINT8_MAX) { 3027 env->vxsat = 0x1; 3028 return UINT8_MAX; 3029 } else { 3030 return res; 3031 } 3032 } 3033 3034 static inline uint16_t 3035 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3036 { 3037 uint8_t round, shift = b & 0x1f; 3038 uint32_t res; 3039 3040 round = get_round(vxrm, a, shift); 3041 res = (a >> shift) + round; 3042 if (res > UINT16_MAX) { 3043 env->vxsat = 0x1; 3044 return UINT16_MAX; 3045 } else { 3046 return res; 3047 } 3048 } 3049 3050 static inline uint32_t 3051 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3052 { 3053 uint8_t round, shift = b & 0x3f; 3054 uint64_t res; 3055 3056 round = get_round(vxrm, a, shift); 3057 res = (a >> shift) + round; 3058 if (res > UINT32_MAX) { 3059 env->vxsat = 0x1; 3060 return UINT32_MAX; 3061 } else { 3062 return res; 3063 } 3064 } 3065 3066 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3067 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3068 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3069 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3070 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3071 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3072 3073 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3074 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3075 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3076 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3077 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3078 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3079 3080 /* 3081 * Vector Float Point Arithmetic Instructions 3082 */ 3083 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3084 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3085 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3086 CPURISCVState *env) \ 3087 { \ 3088 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3089 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3090 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3091 } 3092 3093 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3094 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3095 void *vs2, CPURISCVState *env, \ 3096 uint32_t desc) \ 3097 { \ 3098 uint32_t vm = vext_vm(desc); \ 3099 uint32_t vl = env->vl; \ 3100 uint32_t total_elems = \ 3101 vext_get_total_elems(env, desc, ESZ); \ 3102 uint32_t vta = vext_vta(desc); \ 3103 uint32_t vma = vext_vma(desc); \ 3104 uint32_t i; \ 3105 \ 3106 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3107 \ 3108 for (i = env->vstart; i < vl; i++) { \ 3109 if (!vm && !vext_elem_mask(v0, i)) { \ 3110 /* set masked-off elements to 1s */ \ 3111 vext_set_elems_1s(vd, vma, i * ESZ, \ 3112 (i + 1) * ESZ); \ 3113 continue; \ 3114 } \ 3115 do_##NAME(vd, vs1, vs2, i, env); \ 3116 } \ 3117 env->vstart = 0; \ 3118 /* set tail elements to 1s */ \ 3119 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3120 total_elems * ESZ); \ 3121 } 3122 3123 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3124 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3125 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3126 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3127 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3128 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3129 3130 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3131 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3132 CPURISCVState *env) \ 3133 { \ 3134 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3135 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3136 } 3137 3138 #define GEN_VEXT_VF(NAME, ESZ) \ 3139 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3140 void *vs2, CPURISCVState *env, \ 3141 uint32_t desc) \ 3142 { \ 3143 uint32_t vm = vext_vm(desc); \ 3144 uint32_t vl = env->vl; \ 3145 uint32_t total_elems = \ 3146 vext_get_total_elems(env, desc, ESZ); \ 3147 uint32_t vta = vext_vta(desc); \ 3148 uint32_t vma = vext_vma(desc); \ 3149 uint32_t i; \ 3150 \ 3151 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3152 \ 3153 for (i = env->vstart; i < vl; i++) { \ 3154 if (!vm && !vext_elem_mask(v0, i)) { \ 3155 /* set masked-off elements to 1s */ \ 3156 vext_set_elems_1s(vd, vma, i * ESZ, \ 3157 (i + 1) * ESZ); \ 3158 continue; \ 3159 } \ 3160 do_##NAME(vd, s1, vs2, i, env); \ 3161 } \ 3162 env->vstart = 0; \ 3163 /* set tail elements to 1s */ \ 3164 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3165 total_elems * ESZ); \ 3166 } 3167 3168 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3169 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3170 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3171 GEN_VEXT_VF(vfadd_vf_h, 2) 3172 GEN_VEXT_VF(vfadd_vf_w, 4) 3173 GEN_VEXT_VF(vfadd_vf_d, 8) 3174 3175 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3176 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3177 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3178 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3179 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3180 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3181 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3182 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3183 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3184 GEN_VEXT_VF(vfsub_vf_h, 2) 3185 GEN_VEXT_VF(vfsub_vf_w, 4) 3186 GEN_VEXT_VF(vfsub_vf_d, 8) 3187 3188 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3189 { 3190 return float16_sub(b, a, s); 3191 } 3192 3193 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3194 { 3195 return float32_sub(b, a, s); 3196 } 3197 3198 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3199 { 3200 return float64_sub(b, a, s); 3201 } 3202 3203 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3204 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3205 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3206 GEN_VEXT_VF(vfrsub_vf_h, 2) 3207 GEN_VEXT_VF(vfrsub_vf_w, 4) 3208 GEN_VEXT_VF(vfrsub_vf_d, 8) 3209 3210 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3211 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3212 { 3213 return float32_add(float16_to_float32(a, true, s), 3214 float16_to_float32(b, true, s), s); 3215 } 3216 3217 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3218 { 3219 return float64_add(float32_to_float64(a, s), 3220 float32_to_float64(b, s), s); 3221 3222 } 3223 3224 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3225 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3226 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3227 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3228 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3229 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3230 GEN_VEXT_VF(vfwadd_vf_h, 4) 3231 GEN_VEXT_VF(vfwadd_vf_w, 8) 3232 3233 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3234 { 3235 return float32_sub(float16_to_float32(a, true, s), 3236 float16_to_float32(b, true, s), s); 3237 } 3238 3239 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3240 { 3241 return float64_sub(float32_to_float64(a, s), 3242 float32_to_float64(b, s), s); 3243 3244 } 3245 3246 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3247 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3248 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3249 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3250 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3251 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3252 GEN_VEXT_VF(vfwsub_vf_h, 4) 3253 GEN_VEXT_VF(vfwsub_vf_w, 8) 3254 3255 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3256 { 3257 return float32_add(a, float16_to_float32(b, true, s), s); 3258 } 3259 3260 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3261 { 3262 return float64_add(a, float32_to_float64(b, s), s); 3263 } 3264 3265 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3266 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3267 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3268 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3269 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3270 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3271 GEN_VEXT_VF(vfwadd_wf_h, 4) 3272 GEN_VEXT_VF(vfwadd_wf_w, 8) 3273 3274 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3275 { 3276 return float32_sub(a, float16_to_float32(b, true, s), s); 3277 } 3278 3279 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3280 { 3281 return float64_sub(a, float32_to_float64(b, s), s); 3282 } 3283 3284 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3285 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3286 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3287 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3288 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3289 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3290 GEN_VEXT_VF(vfwsub_wf_h, 4) 3291 GEN_VEXT_VF(vfwsub_wf_w, 8) 3292 3293 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3294 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3295 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3296 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3297 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3298 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3299 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3300 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3301 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3302 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3303 GEN_VEXT_VF(vfmul_vf_h, 2) 3304 GEN_VEXT_VF(vfmul_vf_w, 4) 3305 GEN_VEXT_VF(vfmul_vf_d, 8) 3306 3307 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3308 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3309 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3310 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3311 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3312 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3313 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3314 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3315 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3316 GEN_VEXT_VF(vfdiv_vf_h, 2) 3317 GEN_VEXT_VF(vfdiv_vf_w, 4) 3318 GEN_VEXT_VF(vfdiv_vf_d, 8) 3319 3320 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3321 { 3322 return float16_div(b, a, s); 3323 } 3324 3325 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3326 { 3327 return float32_div(b, a, s); 3328 } 3329 3330 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3331 { 3332 return float64_div(b, a, s); 3333 } 3334 3335 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3336 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3337 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3338 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3339 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3340 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3341 3342 /* Vector Widening Floating-Point Multiply */ 3343 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3344 { 3345 return float32_mul(float16_to_float32(a, true, s), 3346 float16_to_float32(b, true, s), s); 3347 } 3348 3349 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3350 { 3351 return float64_mul(float32_to_float64(a, s), 3352 float32_to_float64(b, s), s); 3353 3354 } 3355 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3356 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3357 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3358 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3359 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3360 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3361 GEN_VEXT_VF(vfwmul_vf_h, 4) 3362 GEN_VEXT_VF(vfwmul_vf_w, 8) 3363 3364 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3365 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3366 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3367 CPURISCVState *env) \ 3368 { \ 3369 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3370 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3371 TD d = *((TD *)vd + HD(i)); \ 3372 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3373 } 3374 3375 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3376 { 3377 return float16_muladd(a, b, d, 0, s); 3378 } 3379 3380 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3381 { 3382 return float32_muladd(a, b, d, 0, s); 3383 } 3384 3385 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3386 { 3387 return float64_muladd(a, b, d, 0, s); 3388 } 3389 3390 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3391 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3392 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3393 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3394 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3395 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3396 3397 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3398 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3399 CPURISCVState *env) \ 3400 { \ 3401 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3402 TD d = *((TD *)vd + HD(i)); \ 3403 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3404 } 3405 3406 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3407 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3408 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3409 GEN_VEXT_VF(vfmacc_vf_h, 2) 3410 GEN_VEXT_VF(vfmacc_vf_w, 4) 3411 GEN_VEXT_VF(vfmacc_vf_d, 8) 3412 3413 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3414 { 3415 return float16_muladd(a, b, d, float_muladd_negate_c | 3416 float_muladd_negate_product, s); 3417 } 3418 3419 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3420 { 3421 return float32_muladd(a, b, d, float_muladd_negate_c | 3422 float_muladd_negate_product, s); 3423 } 3424 3425 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3426 { 3427 return float64_muladd(a, b, d, float_muladd_negate_c | 3428 float_muladd_negate_product, s); 3429 } 3430 3431 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3432 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3433 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3434 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3435 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3436 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3437 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3438 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3439 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3440 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3441 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3442 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3443 3444 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3445 { 3446 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3447 } 3448 3449 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3450 { 3451 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3452 } 3453 3454 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3455 { 3456 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3457 } 3458 3459 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3460 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3461 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3462 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3463 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3464 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3465 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3466 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3467 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3468 GEN_VEXT_VF(vfmsac_vf_h, 2) 3469 GEN_VEXT_VF(vfmsac_vf_w, 4) 3470 GEN_VEXT_VF(vfmsac_vf_d, 8) 3471 3472 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3473 { 3474 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3475 } 3476 3477 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3478 { 3479 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3480 } 3481 3482 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3483 { 3484 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3485 } 3486 3487 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3488 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3489 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3490 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3491 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3492 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3493 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3494 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3495 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3496 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3497 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3498 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3499 3500 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3501 { 3502 return float16_muladd(d, b, a, 0, s); 3503 } 3504 3505 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3506 { 3507 return float32_muladd(d, b, a, 0, s); 3508 } 3509 3510 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3511 { 3512 return float64_muladd(d, b, a, 0, s); 3513 } 3514 3515 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3516 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3517 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3518 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3519 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3520 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3521 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3522 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3523 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3524 GEN_VEXT_VF(vfmadd_vf_h, 2) 3525 GEN_VEXT_VF(vfmadd_vf_w, 4) 3526 GEN_VEXT_VF(vfmadd_vf_d, 8) 3527 3528 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3529 { 3530 return float16_muladd(d, b, a, float_muladd_negate_c | 3531 float_muladd_negate_product, s); 3532 } 3533 3534 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3535 { 3536 return float32_muladd(d, b, a, float_muladd_negate_c | 3537 float_muladd_negate_product, s); 3538 } 3539 3540 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3541 { 3542 return float64_muladd(d, b, a, float_muladd_negate_c | 3543 float_muladd_negate_product, s); 3544 } 3545 3546 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3547 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3548 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3549 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3550 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3551 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3552 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3553 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3554 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3555 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3556 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3557 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3558 3559 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3560 { 3561 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3562 } 3563 3564 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3565 { 3566 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3567 } 3568 3569 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3570 { 3571 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3572 } 3573 3574 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3575 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3576 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3577 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3578 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3579 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3580 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3581 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3582 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3583 GEN_VEXT_VF(vfmsub_vf_h, 2) 3584 GEN_VEXT_VF(vfmsub_vf_w, 4) 3585 GEN_VEXT_VF(vfmsub_vf_d, 8) 3586 3587 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3588 { 3589 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3590 } 3591 3592 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3593 { 3594 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3595 } 3596 3597 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3598 { 3599 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3600 } 3601 3602 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3603 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3604 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3605 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3606 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3607 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3608 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3609 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3610 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3611 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3612 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3613 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3614 3615 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3616 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3617 { 3618 return float32_muladd(float16_to_float32(a, true, s), 3619 float16_to_float32(b, true, s), d, 0, s); 3620 } 3621 3622 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3623 { 3624 return float64_muladd(float32_to_float64(a, s), 3625 float32_to_float64(b, s), d, 0, s); 3626 } 3627 3628 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3629 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3630 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3631 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3632 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3633 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3634 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3635 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3636 3637 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3638 { 3639 return float32_muladd(bfloat16_to_float32(a, s), 3640 bfloat16_to_float32(b, s), d, 0, s); 3641 } 3642 3643 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3644 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3645 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3646 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3647 3648 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3649 { 3650 return float32_muladd(float16_to_float32(a, true, s), 3651 float16_to_float32(b, true, s), d, 3652 float_muladd_negate_c | float_muladd_negate_product, 3653 s); 3654 } 3655 3656 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3657 { 3658 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3659 d, float_muladd_negate_c | 3660 float_muladd_negate_product, s); 3661 } 3662 3663 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3664 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3665 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3666 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3667 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3668 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3669 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3670 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3671 3672 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3673 { 3674 return float32_muladd(float16_to_float32(a, true, s), 3675 float16_to_float32(b, true, s), d, 3676 float_muladd_negate_c, s); 3677 } 3678 3679 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3680 { 3681 return float64_muladd(float32_to_float64(a, s), 3682 float32_to_float64(b, s), d, 3683 float_muladd_negate_c, s); 3684 } 3685 3686 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3687 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3688 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3689 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3690 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3691 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3692 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3693 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3694 3695 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3696 { 3697 return float32_muladd(float16_to_float32(a, true, s), 3698 float16_to_float32(b, true, s), d, 3699 float_muladd_negate_product, s); 3700 } 3701 3702 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3703 { 3704 return float64_muladd(float32_to_float64(a, s), 3705 float32_to_float64(b, s), d, 3706 float_muladd_negate_product, s); 3707 } 3708 3709 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3710 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3711 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3712 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3713 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3714 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3715 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3716 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3717 3718 /* Vector Floating-Point Square-Root Instruction */ 3719 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3720 static void do_##NAME(void *vd, void *vs2, int i, \ 3721 CPURISCVState *env) \ 3722 { \ 3723 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3724 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3725 } 3726 3727 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3728 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3729 CPURISCVState *env, uint32_t desc) \ 3730 { \ 3731 uint32_t vm = vext_vm(desc); \ 3732 uint32_t vl = env->vl; \ 3733 uint32_t total_elems = \ 3734 vext_get_total_elems(env, desc, ESZ); \ 3735 uint32_t vta = vext_vta(desc); \ 3736 uint32_t vma = vext_vma(desc); \ 3737 uint32_t i; \ 3738 \ 3739 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3740 \ 3741 if (vl == 0) { \ 3742 return; \ 3743 } \ 3744 for (i = env->vstart; i < vl; i++) { \ 3745 if (!vm && !vext_elem_mask(v0, i)) { \ 3746 /* set masked-off elements to 1s */ \ 3747 vext_set_elems_1s(vd, vma, i * ESZ, \ 3748 (i + 1) * ESZ); \ 3749 continue; \ 3750 } \ 3751 do_##NAME(vd, vs2, i, env); \ 3752 } \ 3753 env->vstart = 0; \ 3754 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3755 total_elems * ESZ); \ 3756 } 3757 3758 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3759 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3760 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3761 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3762 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3763 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3764 3765 /* 3766 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3767 * 3768 * Adapted from riscv-v-spec recip.c: 3769 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3770 */ 3771 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3772 { 3773 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3774 uint64_t exp = extract64(f, frac_size, exp_size); 3775 uint64_t frac = extract64(f, 0, frac_size); 3776 3777 const uint8_t lookup_table[] = { 3778 52, 51, 50, 48, 47, 46, 44, 43, 3779 42, 41, 40, 39, 38, 36, 35, 34, 3780 33, 32, 31, 30, 30, 29, 28, 27, 3781 26, 25, 24, 23, 23, 22, 21, 20, 3782 19, 19, 18, 17, 16, 16, 15, 14, 3783 14, 13, 12, 12, 11, 10, 10, 9, 3784 9, 8, 7, 7, 6, 6, 5, 4, 3785 4, 3, 3, 2, 2, 1, 1, 0, 3786 127, 125, 123, 121, 119, 118, 116, 114, 3787 113, 111, 109, 108, 106, 105, 103, 102, 3788 100, 99, 97, 96, 95, 93, 92, 91, 3789 90, 88, 87, 86, 85, 84, 83, 82, 3790 80, 79, 78, 77, 76, 75, 74, 73, 3791 72, 71, 70, 70, 69, 68, 67, 66, 3792 65, 64, 63, 63, 62, 61, 60, 59, 3793 59, 58, 57, 56, 56, 55, 54, 53 3794 }; 3795 const int precision = 7; 3796 3797 if (exp == 0 && frac != 0) { /* subnormal */ 3798 /* Normalize the subnormal. */ 3799 while (extract64(frac, frac_size - 1, 1) == 0) { 3800 exp--; 3801 frac <<= 1; 3802 } 3803 3804 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3805 } 3806 3807 int idx = ((exp & 1) << (precision - 1)) | 3808 (frac >> (frac_size - precision + 1)); 3809 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3810 (frac_size - precision); 3811 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3812 3813 uint64_t val = 0; 3814 val = deposit64(val, 0, frac_size, out_frac); 3815 val = deposit64(val, frac_size, exp_size, out_exp); 3816 val = deposit64(val, frac_size + exp_size, 1, sign); 3817 return val; 3818 } 3819 3820 static float16 frsqrt7_h(float16 f, float_status *s) 3821 { 3822 int exp_size = 5, frac_size = 10; 3823 bool sign = float16_is_neg(f); 3824 3825 /* 3826 * frsqrt7(sNaN) = canonical NaN 3827 * frsqrt7(-inf) = canonical NaN 3828 * frsqrt7(-normal) = canonical NaN 3829 * frsqrt7(-subnormal) = canonical NaN 3830 */ 3831 if (float16_is_signaling_nan(f, s) || 3832 (float16_is_infinity(f) && sign) || 3833 (float16_is_normal(f) && sign) || 3834 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3835 s->float_exception_flags |= float_flag_invalid; 3836 return float16_default_nan(s); 3837 } 3838 3839 /* frsqrt7(qNaN) = canonical NaN */ 3840 if (float16_is_quiet_nan(f, s)) { 3841 return float16_default_nan(s); 3842 } 3843 3844 /* frsqrt7(+-0) = +-inf */ 3845 if (float16_is_zero(f)) { 3846 s->float_exception_flags |= float_flag_divbyzero; 3847 return float16_set_sign(float16_infinity, sign); 3848 } 3849 3850 /* frsqrt7(+inf) = +0 */ 3851 if (float16_is_infinity(f) && !sign) { 3852 return float16_set_sign(float16_zero, sign); 3853 } 3854 3855 /* +normal, +subnormal */ 3856 uint64_t val = frsqrt7(f, exp_size, frac_size); 3857 return make_float16(val); 3858 } 3859 3860 static float32 frsqrt7_s(float32 f, float_status *s) 3861 { 3862 int exp_size = 8, frac_size = 23; 3863 bool sign = float32_is_neg(f); 3864 3865 /* 3866 * frsqrt7(sNaN) = canonical NaN 3867 * frsqrt7(-inf) = canonical NaN 3868 * frsqrt7(-normal) = canonical NaN 3869 * frsqrt7(-subnormal) = canonical NaN 3870 */ 3871 if (float32_is_signaling_nan(f, s) || 3872 (float32_is_infinity(f) && sign) || 3873 (float32_is_normal(f) && sign) || 3874 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3875 s->float_exception_flags |= float_flag_invalid; 3876 return float32_default_nan(s); 3877 } 3878 3879 /* frsqrt7(qNaN) = canonical NaN */ 3880 if (float32_is_quiet_nan(f, s)) { 3881 return float32_default_nan(s); 3882 } 3883 3884 /* frsqrt7(+-0) = +-inf */ 3885 if (float32_is_zero(f)) { 3886 s->float_exception_flags |= float_flag_divbyzero; 3887 return float32_set_sign(float32_infinity, sign); 3888 } 3889 3890 /* frsqrt7(+inf) = +0 */ 3891 if (float32_is_infinity(f) && !sign) { 3892 return float32_set_sign(float32_zero, sign); 3893 } 3894 3895 /* +normal, +subnormal */ 3896 uint64_t val = frsqrt7(f, exp_size, frac_size); 3897 return make_float32(val); 3898 } 3899 3900 static float64 frsqrt7_d(float64 f, float_status *s) 3901 { 3902 int exp_size = 11, frac_size = 52; 3903 bool sign = float64_is_neg(f); 3904 3905 /* 3906 * frsqrt7(sNaN) = canonical NaN 3907 * frsqrt7(-inf) = canonical NaN 3908 * frsqrt7(-normal) = canonical NaN 3909 * frsqrt7(-subnormal) = canonical NaN 3910 */ 3911 if (float64_is_signaling_nan(f, s) || 3912 (float64_is_infinity(f) && sign) || 3913 (float64_is_normal(f) && sign) || 3914 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3915 s->float_exception_flags |= float_flag_invalid; 3916 return float64_default_nan(s); 3917 } 3918 3919 /* frsqrt7(qNaN) = canonical NaN */ 3920 if (float64_is_quiet_nan(f, s)) { 3921 return float64_default_nan(s); 3922 } 3923 3924 /* frsqrt7(+-0) = +-inf */ 3925 if (float64_is_zero(f)) { 3926 s->float_exception_flags |= float_flag_divbyzero; 3927 return float64_set_sign(float64_infinity, sign); 3928 } 3929 3930 /* frsqrt7(+inf) = +0 */ 3931 if (float64_is_infinity(f) && !sign) { 3932 return float64_set_sign(float64_zero, sign); 3933 } 3934 3935 /* +normal, +subnormal */ 3936 uint64_t val = frsqrt7(f, exp_size, frac_size); 3937 return make_float64(val); 3938 } 3939 3940 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3941 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3942 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3943 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3944 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3945 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3946 3947 /* 3948 * Vector Floating-Point Reciprocal Estimate Instruction 3949 * 3950 * Adapted from riscv-v-spec recip.c: 3951 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3952 */ 3953 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3954 float_status *s) 3955 { 3956 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3957 uint64_t exp = extract64(f, frac_size, exp_size); 3958 uint64_t frac = extract64(f, 0, frac_size); 3959 3960 const uint8_t lookup_table[] = { 3961 127, 125, 123, 121, 119, 117, 116, 114, 3962 112, 110, 109, 107, 105, 104, 102, 100, 3963 99, 97, 96, 94, 93, 91, 90, 88, 3964 87, 85, 84, 83, 81, 80, 79, 77, 3965 76, 75, 74, 72, 71, 70, 69, 68, 3966 66, 65, 64, 63, 62, 61, 60, 59, 3967 58, 57, 56, 55, 54, 53, 52, 51, 3968 50, 49, 48, 47, 46, 45, 44, 43, 3969 42, 41, 40, 40, 39, 38, 37, 36, 3970 35, 35, 34, 33, 32, 31, 31, 30, 3971 29, 28, 28, 27, 26, 25, 25, 24, 3972 23, 23, 22, 21, 21, 20, 19, 19, 3973 18, 17, 17, 16, 15, 15, 14, 14, 3974 13, 12, 12, 11, 11, 10, 9, 9, 3975 8, 8, 7, 7, 6, 5, 5, 4, 3976 4, 3, 3, 2, 2, 1, 1, 0 3977 }; 3978 const int precision = 7; 3979 3980 if (exp == 0 && frac != 0) { /* subnormal */ 3981 /* Normalize the subnormal. */ 3982 while (extract64(frac, frac_size - 1, 1) == 0) { 3983 exp--; 3984 frac <<= 1; 3985 } 3986 3987 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3988 3989 if (exp != 0 && exp != UINT64_MAX) { 3990 /* 3991 * Overflow to inf or max value of same sign, 3992 * depending on sign and rounding mode. 3993 */ 3994 s->float_exception_flags |= (float_flag_inexact | 3995 float_flag_overflow); 3996 3997 if ((s->float_rounding_mode == float_round_to_zero) || 3998 ((s->float_rounding_mode == float_round_down) && !sign) || 3999 ((s->float_rounding_mode == float_round_up) && sign)) { 4000 /* Return greatest/negative finite value. */ 4001 return (sign << (exp_size + frac_size)) | 4002 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 4003 } else { 4004 /* Return +-inf. */ 4005 return (sign << (exp_size + frac_size)) | 4006 MAKE_64BIT_MASK(frac_size, exp_size); 4007 } 4008 } 4009 } 4010 4011 int idx = frac >> (frac_size - precision); 4012 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 4013 (frac_size - precision); 4014 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 4015 4016 if (out_exp == 0 || out_exp == UINT64_MAX) { 4017 /* 4018 * The result is subnormal, but don't raise the underflow exception, 4019 * because there's no additional loss of precision. 4020 */ 4021 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 4022 if (out_exp == UINT64_MAX) { 4023 out_frac >>= 1; 4024 out_exp = 0; 4025 } 4026 } 4027 4028 uint64_t val = 0; 4029 val = deposit64(val, 0, frac_size, out_frac); 4030 val = deposit64(val, frac_size, exp_size, out_exp); 4031 val = deposit64(val, frac_size + exp_size, 1, sign); 4032 return val; 4033 } 4034 4035 static float16 frec7_h(float16 f, float_status *s) 4036 { 4037 int exp_size = 5, frac_size = 10; 4038 bool sign = float16_is_neg(f); 4039 4040 /* frec7(+-inf) = +-0 */ 4041 if (float16_is_infinity(f)) { 4042 return float16_set_sign(float16_zero, sign); 4043 } 4044 4045 /* frec7(+-0) = +-inf */ 4046 if (float16_is_zero(f)) { 4047 s->float_exception_flags |= float_flag_divbyzero; 4048 return float16_set_sign(float16_infinity, sign); 4049 } 4050 4051 /* frec7(sNaN) = canonical NaN */ 4052 if (float16_is_signaling_nan(f, s)) { 4053 s->float_exception_flags |= float_flag_invalid; 4054 return float16_default_nan(s); 4055 } 4056 4057 /* frec7(qNaN) = canonical NaN */ 4058 if (float16_is_quiet_nan(f, s)) { 4059 return float16_default_nan(s); 4060 } 4061 4062 /* +-normal, +-subnormal */ 4063 uint64_t val = frec7(f, exp_size, frac_size, s); 4064 return make_float16(val); 4065 } 4066 4067 static float32 frec7_s(float32 f, float_status *s) 4068 { 4069 int exp_size = 8, frac_size = 23; 4070 bool sign = float32_is_neg(f); 4071 4072 /* frec7(+-inf) = +-0 */ 4073 if (float32_is_infinity(f)) { 4074 return float32_set_sign(float32_zero, sign); 4075 } 4076 4077 /* frec7(+-0) = +-inf */ 4078 if (float32_is_zero(f)) { 4079 s->float_exception_flags |= float_flag_divbyzero; 4080 return float32_set_sign(float32_infinity, sign); 4081 } 4082 4083 /* frec7(sNaN) = canonical NaN */ 4084 if (float32_is_signaling_nan(f, s)) { 4085 s->float_exception_flags |= float_flag_invalid; 4086 return float32_default_nan(s); 4087 } 4088 4089 /* frec7(qNaN) = canonical NaN */ 4090 if (float32_is_quiet_nan(f, s)) { 4091 return float32_default_nan(s); 4092 } 4093 4094 /* +-normal, +-subnormal */ 4095 uint64_t val = frec7(f, exp_size, frac_size, s); 4096 return make_float32(val); 4097 } 4098 4099 static float64 frec7_d(float64 f, float_status *s) 4100 { 4101 int exp_size = 11, frac_size = 52; 4102 bool sign = float64_is_neg(f); 4103 4104 /* frec7(+-inf) = +-0 */ 4105 if (float64_is_infinity(f)) { 4106 return float64_set_sign(float64_zero, sign); 4107 } 4108 4109 /* frec7(+-0) = +-inf */ 4110 if (float64_is_zero(f)) { 4111 s->float_exception_flags |= float_flag_divbyzero; 4112 return float64_set_sign(float64_infinity, sign); 4113 } 4114 4115 /* frec7(sNaN) = canonical NaN */ 4116 if (float64_is_signaling_nan(f, s)) { 4117 s->float_exception_flags |= float_flag_invalid; 4118 return float64_default_nan(s); 4119 } 4120 4121 /* frec7(qNaN) = canonical NaN */ 4122 if (float64_is_quiet_nan(f, s)) { 4123 return float64_default_nan(s); 4124 } 4125 4126 /* +-normal, +-subnormal */ 4127 uint64_t val = frec7(f, exp_size, frac_size, s); 4128 return make_float64(val); 4129 } 4130 4131 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4132 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4133 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4134 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4135 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4136 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4137 4138 /* Vector Floating-Point MIN/MAX Instructions */ 4139 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4140 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4141 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4142 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4143 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4144 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4145 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4146 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4147 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4148 GEN_VEXT_VF(vfmin_vf_h, 2) 4149 GEN_VEXT_VF(vfmin_vf_w, 4) 4150 GEN_VEXT_VF(vfmin_vf_d, 8) 4151 4152 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4153 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4154 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4155 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4156 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4157 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4158 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4159 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4160 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4161 GEN_VEXT_VF(vfmax_vf_h, 2) 4162 GEN_VEXT_VF(vfmax_vf_w, 4) 4163 GEN_VEXT_VF(vfmax_vf_d, 8) 4164 4165 /* Vector Floating-Point Sign-Injection Instructions */ 4166 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4167 { 4168 return deposit64(b, 0, 15, a); 4169 } 4170 4171 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4172 { 4173 return deposit64(b, 0, 31, a); 4174 } 4175 4176 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4177 { 4178 return deposit64(b, 0, 63, a); 4179 } 4180 4181 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4182 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4183 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4184 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4185 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4186 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4187 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4188 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4189 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4190 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4191 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4192 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4193 4194 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4195 { 4196 return deposit64(~b, 0, 15, a); 4197 } 4198 4199 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4200 { 4201 return deposit64(~b, 0, 31, a); 4202 } 4203 4204 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4205 { 4206 return deposit64(~b, 0, 63, a); 4207 } 4208 4209 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4210 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4211 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4212 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4213 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4214 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4215 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4216 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4217 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4218 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4219 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4220 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4221 4222 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4223 { 4224 return deposit64(b ^ a, 0, 15, a); 4225 } 4226 4227 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4228 { 4229 return deposit64(b ^ a, 0, 31, a); 4230 } 4231 4232 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4233 { 4234 return deposit64(b ^ a, 0, 63, a); 4235 } 4236 4237 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4238 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4239 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4240 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4241 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4242 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4243 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4244 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4245 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4246 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4247 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4248 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4249 4250 /* Vector Floating-Point Compare Instructions */ 4251 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4252 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4253 CPURISCVState *env, uint32_t desc) \ 4254 { \ 4255 uint32_t vm = vext_vm(desc); \ 4256 uint32_t vl = env->vl; \ 4257 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4258 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4259 uint32_t vma = vext_vma(desc); \ 4260 uint32_t i; \ 4261 \ 4262 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4263 \ 4264 for (i = env->vstart; i < vl; i++) { \ 4265 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4266 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4267 if (!vm && !vext_elem_mask(v0, i)) { \ 4268 /* set masked-off elements to 1s */ \ 4269 if (vma) { \ 4270 vext_set_elem_mask(vd, i, 1); \ 4271 } \ 4272 continue; \ 4273 } \ 4274 vext_set_elem_mask(vd, i, \ 4275 DO_OP(s2, s1, &env->fp_status)); \ 4276 } \ 4277 env->vstart = 0; \ 4278 /* 4279 * mask destination register are always tail-agnostic 4280 * set tail elements to 1s 4281 */ \ 4282 if (vta_all_1s) { \ 4283 for (; i < total_elems; i++) { \ 4284 vext_set_elem_mask(vd, i, 1); \ 4285 } \ 4286 } \ 4287 } 4288 4289 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4290 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4291 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4292 4293 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4294 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4295 CPURISCVState *env, uint32_t desc) \ 4296 { \ 4297 uint32_t vm = vext_vm(desc); \ 4298 uint32_t vl = env->vl; \ 4299 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4300 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4301 uint32_t vma = vext_vma(desc); \ 4302 uint32_t i; \ 4303 \ 4304 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4305 \ 4306 for (i = env->vstart; i < vl; i++) { \ 4307 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4308 if (!vm && !vext_elem_mask(v0, i)) { \ 4309 /* set masked-off elements to 1s */ \ 4310 if (vma) { \ 4311 vext_set_elem_mask(vd, i, 1); \ 4312 } \ 4313 continue; \ 4314 } \ 4315 vext_set_elem_mask(vd, i, \ 4316 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4317 } \ 4318 env->vstart = 0; \ 4319 /* 4320 * mask destination register are always tail-agnostic 4321 * set tail elements to 1s 4322 */ \ 4323 if (vta_all_1s) { \ 4324 for (; i < total_elems; i++) { \ 4325 vext_set_elem_mask(vd, i, 1); \ 4326 } \ 4327 } \ 4328 } 4329 4330 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4331 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4332 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4333 4334 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4335 { 4336 FloatRelation compare = float16_compare_quiet(a, b, s); 4337 return compare != float_relation_equal; 4338 } 4339 4340 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4341 { 4342 FloatRelation compare = float32_compare_quiet(a, b, s); 4343 return compare != float_relation_equal; 4344 } 4345 4346 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4347 { 4348 FloatRelation compare = float64_compare_quiet(a, b, s); 4349 return compare != float_relation_equal; 4350 } 4351 4352 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4353 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4354 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4355 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4356 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4357 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4358 4359 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4360 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4361 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4362 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4363 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4364 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4365 4366 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4367 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4368 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4369 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4370 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4371 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4372 4373 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4374 { 4375 FloatRelation compare = float16_compare(a, b, s); 4376 return compare == float_relation_greater; 4377 } 4378 4379 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4380 { 4381 FloatRelation compare = float32_compare(a, b, s); 4382 return compare == float_relation_greater; 4383 } 4384 4385 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4386 { 4387 FloatRelation compare = float64_compare(a, b, s); 4388 return compare == float_relation_greater; 4389 } 4390 4391 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4392 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4393 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4394 4395 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4396 { 4397 FloatRelation compare = float16_compare(a, b, s); 4398 return compare == float_relation_greater || 4399 compare == float_relation_equal; 4400 } 4401 4402 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4403 { 4404 FloatRelation compare = float32_compare(a, b, s); 4405 return compare == float_relation_greater || 4406 compare == float_relation_equal; 4407 } 4408 4409 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4410 { 4411 FloatRelation compare = float64_compare(a, b, s); 4412 return compare == float_relation_greater || 4413 compare == float_relation_equal; 4414 } 4415 4416 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4417 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4418 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4419 4420 /* Vector Floating-Point Classify Instruction */ 4421 target_ulong fclass_h(uint64_t frs1) 4422 { 4423 float16 f = frs1; 4424 bool sign = float16_is_neg(f); 4425 4426 if (float16_is_infinity(f)) { 4427 return sign ? 1 << 0 : 1 << 7; 4428 } else if (float16_is_zero(f)) { 4429 return sign ? 1 << 3 : 1 << 4; 4430 } else if (float16_is_zero_or_denormal(f)) { 4431 return sign ? 1 << 2 : 1 << 5; 4432 } else if (float16_is_any_nan(f)) { 4433 float_status s = { }; /* for snan_bit_is_one */ 4434 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4435 } else { 4436 return sign ? 1 << 1 : 1 << 6; 4437 } 4438 } 4439 4440 target_ulong fclass_s(uint64_t frs1) 4441 { 4442 float32 f = frs1; 4443 bool sign = float32_is_neg(f); 4444 4445 if (float32_is_infinity(f)) { 4446 return sign ? 1 << 0 : 1 << 7; 4447 } else if (float32_is_zero(f)) { 4448 return sign ? 1 << 3 : 1 << 4; 4449 } else if (float32_is_zero_or_denormal(f)) { 4450 return sign ? 1 << 2 : 1 << 5; 4451 } else if (float32_is_any_nan(f)) { 4452 float_status s = { }; /* for snan_bit_is_one */ 4453 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4454 } else { 4455 return sign ? 1 << 1 : 1 << 6; 4456 } 4457 } 4458 4459 target_ulong fclass_d(uint64_t frs1) 4460 { 4461 float64 f = frs1; 4462 bool sign = float64_is_neg(f); 4463 4464 if (float64_is_infinity(f)) { 4465 return sign ? 1 << 0 : 1 << 7; 4466 } else if (float64_is_zero(f)) { 4467 return sign ? 1 << 3 : 1 << 4; 4468 } else if (float64_is_zero_or_denormal(f)) { 4469 return sign ? 1 << 2 : 1 << 5; 4470 } else if (float64_is_any_nan(f)) { 4471 float_status s = { }; /* for snan_bit_is_one */ 4472 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4473 } else { 4474 return sign ? 1 << 1 : 1 << 6; 4475 } 4476 } 4477 4478 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4479 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4480 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4481 GEN_VEXT_V(vfclass_v_h, 2) 4482 GEN_VEXT_V(vfclass_v_w, 4) 4483 GEN_VEXT_V(vfclass_v_d, 8) 4484 4485 /* Vector Floating-Point Merge Instruction */ 4486 4487 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4488 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4489 CPURISCVState *env, uint32_t desc) \ 4490 { \ 4491 uint32_t vm = vext_vm(desc); \ 4492 uint32_t vl = env->vl; \ 4493 uint32_t esz = sizeof(ETYPE); \ 4494 uint32_t total_elems = \ 4495 vext_get_total_elems(env, desc, esz); \ 4496 uint32_t vta = vext_vta(desc); \ 4497 uint32_t i; \ 4498 \ 4499 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4500 \ 4501 for (i = env->vstart; i < vl; i++) { \ 4502 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4503 *((ETYPE *)vd + H(i)) = \ 4504 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4505 } \ 4506 env->vstart = 0; \ 4507 /* set tail elements to 1s */ \ 4508 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4509 } 4510 4511 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4512 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4513 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4514 4515 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4516 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4517 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4518 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4519 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4520 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4521 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4522 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4523 4524 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4525 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4526 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4527 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4528 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4529 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4530 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4531 4532 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4533 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4534 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4535 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4536 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4537 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4538 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4539 4540 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4541 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4542 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4543 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4544 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4545 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4546 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4547 4548 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4549 /* (TD, T2, TX2) */ 4550 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4551 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4552 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4553 /* 4554 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4555 */ 4556 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4557 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4558 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4559 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4560 4561 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4562 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4563 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4564 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4565 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4566 4567 /* 4568 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4569 */ 4570 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4571 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4572 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4573 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4574 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4575 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4576 4577 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4578 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4579 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4580 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4581 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4582 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4583 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4584 4585 /* 4586 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4587 */ 4588 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4589 { 4590 return float16_to_float32(a, true, s); 4591 } 4592 4593 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4594 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4595 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4596 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4597 4598 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4599 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4600 4601 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4602 /* (TD, T2, TX2) */ 4603 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4604 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4605 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4606 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4607 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4608 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4609 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4610 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4611 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4612 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4613 4614 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4615 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4616 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4617 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4618 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4619 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4620 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4621 4622 /* 4623 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4624 */ 4625 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4626 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4627 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4628 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4629 4630 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4631 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4632 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4633 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4634 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4635 4636 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4637 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4638 { 4639 return float32_to_float16(a, true, s); 4640 } 4641 4642 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4643 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4644 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4645 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4646 4647 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4648 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4649 4650 /* 4651 * Vector Reduction Operations 4652 */ 4653 /* Vector Single-Width Integer Reduction Instructions */ 4654 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4655 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4656 void *vs2, CPURISCVState *env, \ 4657 uint32_t desc) \ 4658 { \ 4659 uint32_t vm = vext_vm(desc); \ 4660 uint32_t vl = env->vl; \ 4661 uint32_t esz = sizeof(TD); \ 4662 uint32_t vlenb = simd_maxsz(desc); \ 4663 uint32_t vta = vext_vta(desc); \ 4664 uint32_t i; \ 4665 TD s1 = *((TD *)vs1 + HD(0)); \ 4666 \ 4667 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4668 \ 4669 for (i = env->vstart; i < vl; i++) { \ 4670 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4671 if (!vm && !vext_elem_mask(v0, i)) { \ 4672 continue; \ 4673 } \ 4674 s1 = OP(s1, (TD)s2); \ 4675 } \ 4676 if (vl > 0) { \ 4677 *((TD *)vd + HD(0)) = s1; \ 4678 } \ 4679 env->vstart = 0; \ 4680 /* set tail elements to 1s */ \ 4681 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4682 } 4683 4684 /* vd[0] = sum(vs1[0], vs2[*]) */ 4685 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4686 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4687 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4688 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4689 4690 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4691 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4692 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4693 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4694 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4695 4696 /* vd[0] = max(vs1[0], vs2[*]) */ 4697 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4698 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4699 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4700 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4701 4702 /* vd[0] = minu(vs1[0], vs2[*]) */ 4703 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4704 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4705 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4706 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4707 4708 /* vd[0] = min(vs1[0], vs2[*]) */ 4709 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4710 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4711 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4712 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4713 4714 /* vd[0] = and(vs1[0], vs2[*]) */ 4715 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4716 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4717 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4718 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4719 4720 /* vd[0] = or(vs1[0], vs2[*]) */ 4721 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4722 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4723 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4724 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4725 4726 /* vd[0] = xor(vs1[0], vs2[*]) */ 4727 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4728 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4729 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4730 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4731 4732 /* Vector Widening Integer Reduction Instructions */ 4733 /* signed sum reduction into double-width accumulator */ 4734 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4735 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4736 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4737 4738 /* Unsigned sum reduction into double-width accumulator */ 4739 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4740 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4741 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4742 4743 /* Vector Single-Width Floating-Point Reduction Instructions */ 4744 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4745 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4746 void *vs2, CPURISCVState *env, \ 4747 uint32_t desc) \ 4748 { \ 4749 uint32_t vm = vext_vm(desc); \ 4750 uint32_t vl = env->vl; \ 4751 uint32_t esz = sizeof(TD); \ 4752 uint32_t vlenb = simd_maxsz(desc); \ 4753 uint32_t vta = vext_vta(desc); \ 4754 uint32_t i; \ 4755 TD s1 = *((TD *)vs1 + HD(0)); \ 4756 \ 4757 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4758 \ 4759 for (i = env->vstart; i < vl; i++) { \ 4760 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4761 if (!vm && !vext_elem_mask(v0, i)) { \ 4762 continue; \ 4763 } \ 4764 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4765 } \ 4766 if (vl > 0) { \ 4767 *((TD *)vd + HD(0)) = s1; \ 4768 } \ 4769 env->vstart = 0; \ 4770 /* set tail elements to 1s */ \ 4771 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4772 } 4773 4774 /* Unordered sum */ 4775 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4776 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4777 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4778 4779 /* Ordered sum */ 4780 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4781 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4782 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4783 4784 /* Maximum value */ 4785 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4786 float16_maximum_number) 4787 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4788 float32_maximum_number) 4789 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4790 float64_maximum_number) 4791 4792 /* Minimum value */ 4793 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4794 float16_minimum_number) 4795 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4796 float32_minimum_number) 4797 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4798 float64_minimum_number) 4799 4800 /* Vector Widening Floating-Point Add Instructions */ 4801 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4802 { 4803 return float32_add(a, float16_to_float32(b, true, s), s); 4804 } 4805 4806 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4807 { 4808 return float64_add(a, float32_to_float64(b, s), s); 4809 } 4810 4811 /* Vector Widening Floating-Point Reduction Instructions */ 4812 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4813 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4814 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4815 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4816 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4817 4818 /* 4819 * Vector Mask Operations 4820 */ 4821 /* Vector Mask-Register Logical Instructions */ 4822 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4823 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4824 void *vs2, CPURISCVState *env, \ 4825 uint32_t desc) \ 4826 { \ 4827 uint32_t vl = env->vl; \ 4828 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4829 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4830 uint32_t i; \ 4831 int a, b; \ 4832 \ 4833 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4834 \ 4835 for (i = env->vstart; i < vl; i++) { \ 4836 a = vext_elem_mask(vs1, i); \ 4837 b = vext_elem_mask(vs2, i); \ 4838 vext_set_elem_mask(vd, i, OP(b, a)); \ 4839 } \ 4840 env->vstart = 0; \ 4841 /* 4842 * mask destination register are always tail-agnostic 4843 * set tail elements to 1s 4844 */ \ 4845 if (vta_all_1s) { \ 4846 for (; i < total_elems; i++) { \ 4847 vext_set_elem_mask(vd, i, 1); \ 4848 } \ 4849 } \ 4850 } 4851 4852 #define DO_NAND(N, M) (!(N & M)) 4853 #define DO_ANDNOT(N, M) (N & !M) 4854 #define DO_NOR(N, M) (!(N | M)) 4855 #define DO_ORNOT(N, M) (N | !M) 4856 #define DO_XNOR(N, M) (!(N ^ M)) 4857 4858 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4859 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4860 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4861 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4862 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4863 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4864 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4865 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4866 4867 /* Vector count population in mask vcpop */ 4868 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4869 uint32_t desc) 4870 { 4871 target_ulong cnt = 0; 4872 uint32_t vm = vext_vm(desc); 4873 uint32_t vl = env->vl; 4874 int i; 4875 4876 for (i = env->vstart; i < vl; i++) { 4877 if (vm || vext_elem_mask(v0, i)) { 4878 if (vext_elem_mask(vs2, i)) { 4879 cnt++; 4880 } 4881 } 4882 } 4883 env->vstart = 0; 4884 return cnt; 4885 } 4886 4887 /* vfirst find-first-set mask bit */ 4888 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4889 uint32_t desc) 4890 { 4891 uint32_t vm = vext_vm(desc); 4892 uint32_t vl = env->vl; 4893 int i; 4894 4895 for (i = env->vstart; i < vl; i++) { 4896 if (vm || vext_elem_mask(v0, i)) { 4897 if (vext_elem_mask(vs2, i)) { 4898 return i; 4899 } 4900 } 4901 } 4902 env->vstart = 0; 4903 return -1LL; 4904 } 4905 4906 enum set_mask_type { 4907 ONLY_FIRST = 1, 4908 INCLUDE_FIRST, 4909 BEFORE_FIRST, 4910 }; 4911 4912 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4913 uint32_t desc, enum set_mask_type type) 4914 { 4915 uint32_t vm = vext_vm(desc); 4916 uint32_t vl = env->vl; 4917 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4918 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4919 uint32_t vma = vext_vma(desc); 4920 int i; 4921 bool first_mask_bit = false; 4922 4923 VSTART_CHECK_EARLY_EXIT(env, vl); 4924 4925 for (i = env->vstart; i < vl; i++) { 4926 if (!vm && !vext_elem_mask(v0, i)) { 4927 /* set masked-off elements to 1s */ 4928 if (vma) { 4929 vext_set_elem_mask(vd, i, 1); 4930 } 4931 continue; 4932 } 4933 /* write a zero to all following active elements */ 4934 if (first_mask_bit) { 4935 vext_set_elem_mask(vd, i, 0); 4936 continue; 4937 } 4938 if (vext_elem_mask(vs2, i)) { 4939 first_mask_bit = true; 4940 if (type == BEFORE_FIRST) { 4941 vext_set_elem_mask(vd, i, 0); 4942 } else { 4943 vext_set_elem_mask(vd, i, 1); 4944 } 4945 } else { 4946 if (type == ONLY_FIRST) { 4947 vext_set_elem_mask(vd, i, 0); 4948 } else { 4949 vext_set_elem_mask(vd, i, 1); 4950 } 4951 } 4952 } 4953 env->vstart = 0; 4954 /* 4955 * mask destination register are always tail-agnostic 4956 * set tail elements to 1s 4957 */ 4958 if (vta_all_1s) { 4959 for (; i < total_elems; i++) { 4960 vext_set_elem_mask(vd, i, 1); 4961 } 4962 } 4963 } 4964 4965 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4966 uint32_t desc) 4967 { 4968 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4969 } 4970 4971 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4972 uint32_t desc) 4973 { 4974 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4975 } 4976 4977 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4978 uint32_t desc) 4979 { 4980 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4981 } 4982 4983 /* Vector Iota Instruction */ 4984 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4985 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4986 uint32_t desc) \ 4987 { \ 4988 uint32_t vm = vext_vm(desc); \ 4989 uint32_t vl = env->vl; \ 4990 uint32_t esz = sizeof(ETYPE); \ 4991 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4992 uint32_t vta = vext_vta(desc); \ 4993 uint32_t vma = vext_vma(desc); \ 4994 uint32_t sum = 0; \ 4995 int i; \ 4996 \ 4997 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4998 \ 4999 for (i = env->vstart; i < vl; i++) { \ 5000 if (!vm && !vext_elem_mask(v0, i)) { \ 5001 /* set masked-off elements to 1s */ \ 5002 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5003 continue; \ 5004 } \ 5005 *((ETYPE *)vd + H(i)) = sum; \ 5006 if (vext_elem_mask(vs2, i)) { \ 5007 sum++; \ 5008 } \ 5009 } \ 5010 env->vstart = 0; \ 5011 /* set tail elements to 1s */ \ 5012 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5013 } 5014 5015 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 5016 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 5017 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 5018 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 5019 5020 /* Vector Element Index Instruction */ 5021 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 5022 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 5023 { \ 5024 uint32_t vm = vext_vm(desc); \ 5025 uint32_t vl = env->vl; \ 5026 uint32_t esz = sizeof(ETYPE); \ 5027 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5028 uint32_t vta = vext_vta(desc); \ 5029 uint32_t vma = vext_vma(desc); \ 5030 int i; \ 5031 \ 5032 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5033 \ 5034 for (i = env->vstart; i < vl; i++) { \ 5035 if (!vm && !vext_elem_mask(v0, i)) { \ 5036 /* set masked-off elements to 1s */ \ 5037 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5038 continue; \ 5039 } \ 5040 *((ETYPE *)vd + H(i)) = i; \ 5041 } \ 5042 env->vstart = 0; \ 5043 /* set tail elements to 1s */ \ 5044 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5045 } 5046 5047 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 5048 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 5049 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 5050 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 5051 5052 /* 5053 * Vector Permutation Instructions 5054 */ 5055 5056 /* Vector Slide Instructions */ 5057 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 5058 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5059 CPURISCVState *env, uint32_t desc) \ 5060 { \ 5061 uint32_t vm = vext_vm(desc); \ 5062 uint32_t vl = env->vl; \ 5063 uint32_t esz = sizeof(ETYPE); \ 5064 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5065 uint32_t vta = vext_vta(desc); \ 5066 uint32_t vma = vext_vma(desc); \ 5067 target_ulong offset = s1, i_min, i; \ 5068 \ 5069 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5070 \ 5071 i_min = MAX(env->vstart, offset); \ 5072 for (i = i_min; i < vl; i++) { \ 5073 if (!vm && !vext_elem_mask(v0, i)) { \ 5074 /* set masked-off elements to 1s */ \ 5075 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5076 continue; \ 5077 } \ 5078 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5079 } \ 5080 env->vstart = 0; \ 5081 /* set tail elements to 1s */ \ 5082 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5083 } 5084 5085 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5086 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5087 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5088 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5089 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5090 5091 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5092 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5093 CPURISCVState *env, uint32_t desc) \ 5094 { \ 5095 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5096 uint32_t vm = vext_vm(desc); \ 5097 uint32_t vl = env->vl; \ 5098 uint32_t esz = sizeof(ETYPE); \ 5099 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5100 uint32_t vta = vext_vta(desc); \ 5101 uint32_t vma = vext_vma(desc); \ 5102 target_ulong i_max, i_min, i; \ 5103 \ 5104 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5105 \ 5106 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 5107 i_max = MAX(i_min, env->vstart); \ 5108 for (i = env->vstart; i < i_max; ++i) { \ 5109 if (!vm && !vext_elem_mask(v0, i)) { \ 5110 /* set masked-off elements to 1s */ \ 5111 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5112 continue; \ 5113 } \ 5114 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5115 } \ 5116 \ 5117 for (i = i_max; i < vl; ++i) { \ 5118 if (vm || vext_elem_mask(v0, i)) { \ 5119 *((ETYPE *)vd + H(i)) = 0; \ 5120 } \ 5121 } \ 5122 \ 5123 env->vstart = 0; \ 5124 /* set tail elements to 1s */ \ 5125 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5126 } 5127 5128 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5129 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5130 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5131 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5132 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5133 5134 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5135 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5136 void *vs2, CPURISCVState *env, \ 5137 uint32_t desc) \ 5138 { \ 5139 typedef uint##BITWIDTH##_t ETYPE; \ 5140 uint32_t vm = vext_vm(desc); \ 5141 uint32_t vl = env->vl; \ 5142 uint32_t esz = sizeof(ETYPE); \ 5143 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5144 uint32_t vta = vext_vta(desc); \ 5145 uint32_t vma = vext_vma(desc); \ 5146 uint32_t i; \ 5147 \ 5148 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5149 \ 5150 for (i = env->vstart; i < vl; i++) { \ 5151 if (!vm && !vext_elem_mask(v0, i)) { \ 5152 /* set masked-off elements to 1s */ \ 5153 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5154 continue; \ 5155 } \ 5156 if (i == 0) { \ 5157 *((ETYPE *)vd + H(i)) = s1; \ 5158 } else { \ 5159 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5160 } \ 5161 } \ 5162 env->vstart = 0; \ 5163 /* set tail elements to 1s */ \ 5164 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5165 } 5166 5167 GEN_VEXT_VSLIE1UP(8, H1) 5168 GEN_VEXT_VSLIE1UP(16, H2) 5169 GEN_VEXT_VSLIE1UP(32, H4) 5170 GEN_VEXT_VSLIE1UP(64, H8) 5171 5172 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5173 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5174 CPURISCVState *env, uint32_t desc) \ 5175 { \ 5176 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5177 } 5178 5179 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5180 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5181 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5182 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5183 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5184 5185 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5186 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5187 void *vs2, CPURISCVState *env, \ 5188 uint32_t desc) \ 5189 { \ 5190 typedef uint##BITWIDTH##_t ETYPE; \ 5191 uint32_t vm = vext_vm(desc); \ 5192 uint32_t vl = env->vl; \ 5193 uint32_t esz = sizeof(ETYPE); \ 5194 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5195 uint32_t vta = vext_vta(desc); \ 5196 uint32_t vma = vext_vma(desc); \ 5197 uint32_t i; \ 5198 \ 5199 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5200 \ 5201 for (i = env->vstart; i < vl; i++) { \ 5202 if (!vm && !vext_elem_mask(v0, i)) { \ 5203 /* set masked-off elements to 1s */ \ 5204 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5205 continue; \ 5206 } \ 5207 if (i == vl - 1) { \ 5208 *((ETYPE *)vd + H(i)) = s1; \ 5209 } else { \ 5210 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5211 } \ 5212 } \ 5213 env->vstart = 0; \ 5214 /* set tail elements to 1s */ \ 5215 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5216 } 5217 5218 GEN_VEXT_VSLIDE1DOWN(8, H1) 5219 GEN_VEXT_VSLIDE1DOWN(16, H2) 5220 GEN_VEXT_VSLIDE1DOWN(32, H4) 5221 GEN_VEXT_VSLIDE1DOWN(64, H8) 5222 5223 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5224 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5225 CPURISCVState *env, uint32_t desc) \ 5226 { \ 5227 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5228 } 5229 5230 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5231 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5232 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5233 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5234 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5235 5236 /* Vector Floating-Point Slide Instructions */ 5237 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5238 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5239 CPURISCVState *env, uint32_t desc) \ 5240 { \ 5241 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5242 } 5243 5244 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5245 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5246 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5247 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5248 5249 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5250 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5251 CPURISCVState *env, uint32_t desc) \ 5252 { \ 5253 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5254 } 5255 5256 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5257 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5258 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5259 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5260 5261 /* Vector Register Gather Instruction */ 5262 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5263 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5264 CPURISCVState *env, uint32_t desc) \ 5265 { \ 5266 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5267 uint32_t vm = vext_vm(desc); \ 5268 uint32_t vl = env->vl; \ 5269 uint32_t esz = sizeof(TS2); \ 5270 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5271 uint32_t vta = vext_vta(desc); \ 5272 uint32_t vma = vext_vma(desc); \ 5273 uint64_t index; \ 5274 uint32_t i; \ 5275 \ 5276 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5277 \ 5278 for (i = env->vstart; i < vl; i++) { \ 5279 if (!vm && !vext_elem_mask(v0, i)) { \ 5280 /* set masked-off elements to 1s */ \ 5281 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5282 continue; \ 5283 } \ 5284 index = *((TS1 *)vs1 + HS1(i)); \ 5285 if (index >= vlmax) { \ 5286 *((TS2 *)vd + HS2(i)) = 0; \ 5287 } else { \ 5288 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5289 } \ 5290 } \ 5291 env->vstart = 0; \ 5292 /* set tail elements to 1s */ \ 5293 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5294 } 5295 5296 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5297 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5298 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5299 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5300 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5301 5302 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5303 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5304 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5305 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5306 5307 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5308 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5309 CPURISCVState *env, uint32_t desc) \ 5310 { \ 5311 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5312 uint32_t vm = vext_vm(desc); \ 5313 uint32_t vl = env->vl; \ 5314 uint32_t esz = sizeof(ETYPE); \ 5315 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5316 uint32_t vta = vext_vta(desc); \ 5317 uint32_t vma = vext_vma(desc); \ 5318 uint64_t index = s1; \ 5319 uint32_t i; \ 5320 \ 5321 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5322 \ 5323 for (i = env->vstart; i < vl; i++) { \ 5324 if (!vm && !vext_elem_mask(v0, i)) { \ 5325 /* set masked-off elements to 1s */ \ 5326 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5327 continue; \ 5328 } \ 5329 if (index >= vlmax) { \ 5330 *((ETYPE *)vd + H(i)) = 0; \ 5331 } else { \ 5332 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5333 } \ 5334 } \ 5335 env->vstart = 0; \ 5336 /* set tail elements to 1s */ \ 5337 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5338 } 5339 5340 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5341 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5342 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5343 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5344 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5345 5346 /* Vector Compress Instruction */ 5347 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5348 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5349 CPURISCVState *env, uint32_t desc) \ 5350 { \ 5351 uint32_t vl = env->vl; \ 5352 uint32_t esz = sizeof(ETYPE); \ 5353 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5354 uint32_t vta = vext_vta(desc); \ 5355 uint32_t num = 0, i; \ 5356 \ 5357 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5358 \ 5359 for (i = env->vstart; i < vl; i++) { \ 5360 if (!vext_elem_mask(vs1, i)) { \ 5361 continue; \ 5362 } \ 5363 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5364 num++; \ 5365 } \ 5366 env->vstart = 0; \ 5367 /* set tail elements to 1s */ \ 5368 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ 5369 } 5370 5371 /* Compress into vd elements of vs2 where vs1 is enabled */ 5372 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5373 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5374 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5375 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5376 5377 /* Vector Whole Register Move */ 5378 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5379 { 5380 /* EEW = SEW */ 5381 uint32_t maxsz = simd_maxsz(desc); 5382 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5383 uint32_t startb = env->vstart * sewb; 5384 uint32_t i = startb; 5385 5386 if (startb >= maxsz) { 5387 env->vstart = 0; 5388 return; 5389 } 5390 5391 if (HOST_BIG_ENDIAN && i % 8 != 0) { 5392 uint32_t j = ROUND_UP(i, 8); 5393 memcpy((uint8_t *)vd + H1(j - 1), 5394 (uint8_t *)vs2 + H1(j - 1), 5395 j - i); 5396 i = j; 5397 } 5398 5399 memcpy((uint8_t *)vd + H1(i), 5400 (uint8_t *)vs2 + H1(i), 5401 maxsz - i); 5402 5403 env->vstart = 0; 5404 } 5405 5406 /* Vector Integer Extension */ 5407 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5408 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5409 CPURISCVState *env, uint32_t desc) \ 5410 { \ 5411 uint32_t vl = env->vl; \ 5412 uint32_t vm = vext_vm(desc); \ 5413 uint32_t esz = sizeof(ETYPE); \ 5414 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5415 uint32_t vta = vext_vta(desc); \ 5416 uint32_t vma = vext_vma(desc); \ 5417 uint32_t i; \ 5418 \ 5419 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5420 \ 5421 for (i = env->vstart; i < vl; i++) { \ 5422 if (!vm && !vext_elem_mask(v0, i)) { \ 5423 /* set masked-off elements to 1s */ \ 5424 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5425 continue; \ 5426 } \ 5427 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5428 } \ 5429 env->vstart = 0; \ 5430 /* set tail elements to 1s */ \ 5431 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5432 } 5433 5434 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5435 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5436 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5437 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5438 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5439 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5440 5441 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5442 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5443 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5444 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5445 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5446 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5447