1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "accel/tcg/cpu-ldst.h" 26 #include "exec/page-protection.h" 27 #include "exec/helper-proto.h" 28 #include "exec/tlb-flags.h" 29 #include "exec/target_page.h" 30 #include "exec/tswap.h" 31 #include "fpu/softfloat.h" 32 #include "tcg/tcg-gvec-desc.h" 33 #include "internals.h" 34 #include "vector_internals.h" 35 #include <math.h> 36 37 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 38 target_ulong s2) 39 { 40 int vlmax, vl; 41 RISCVCPU *cpu = env_archcpu(env); 42 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 43 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 44 uint16_t sew = 8 << vsew; 45 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 46 int xlen = riscv_cpu_xlen(env); 47 bool vill = (s2 >> (xlen - 1)) & 0x1; 48 target_ulong reserved = s2 & 49 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 50 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 51 uint16_t vlen = cpu->cfg.vlenb << 3; 52 int8_t lmul; 53 54 if (vlmul & 4) { 55 /* 56 * Fractional LMUL, check: 57 * 58 * VLEN * LMUL >= SEW 59 * VLEN >> (8 - lmul) >= sew 60 * (vlenb << 3) >> (8 - lmul) >= sew 61 */ 62 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { 63 vill = true; 64 } 65 } 66 67 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 68 /* only set vill bit. */ 69 env->vill = 1; 70 env->vtype = 0; 71 env->vl = 0; 72 env->vstart = 0; 73 return 0; 74 } 75 76 /* lmul encoded as in DisasContext::lmul */ 77 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 78 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 79 if (s1 <= vlmax) { 80 vl = s1; 81 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) { 82 vl = (s1 + 1) >> 1; 83 } else { 84 vl = vlmax; 85 } 86 env->vl = vl; 87 env->vtype = s2; 88 env->vstart = 0; 89 env->vill = 0; 90 return vl; 91 } 92 93 /* 94 * Get the maximum number of elements can be operated. 95 * 96 * log2_esz: log2 of element size in bytes. 97 */ 98 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 99 { 100 /* 101 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 102 * so vlen in bytes (vlenb) is encoded as maxsz. 103 */ 104 uint32_t vlenb = simd_maxsz(desc); 105 106 /* Return VLMAX */ 107 int scale = vext_lmul(desc) - log2_esz; 108 return scale < 0 ? vlenb >> -scale : vlenb << scale; 109 } 110 111 /* 112 * This function checks watchpoint before real load operation. 113 * 114 * In system mode, the TLB API probe_access is enough for watchpoint check. 115 * In user mode, there is no watchpoint support now. 116 * 117 * It will trigger an exception if there is no mapping in TLB 118 * and page table walk can't fill the TLB entry. Then the guest 119 * software can return here after process the exception or never return. 120 */ 121 static void probe_pages(CPURISCVState *env, target_ulong addr, 122 target_ulong len, uintptr_t ra, 123 MMUAccessType access_type) 124 { 125 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 126 target_ulong curlen = MIN(pagelen, len); 127 int mmu_index = riscv_env_mmu_index(env, false); 128 129 probe_access(env, adjust_addr(env, addr), curlen, access_type, 130 mmu_index, ra); 131 if (len > curlen) { 132 addr += curlen; 133 curlen = len - curlen; 134 probe_access(env, adjust_addr(env, addr), curlen, access_type, 135 mmu_index, ra); 136 } 137 } 138 139 static inline void vext_set_elem_mask(void *v0, int index, 140 uint8_t value) 141 { 142 int idx = index / 64; 143 int pos = index % 64; 144 uint64_t old = ((uint64_t *)v0)[idx]; 145 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 146 } 147 148 /* elements operations for load and store */ 149 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, 150 uint32_t idx, void *vd, uintptr_t retaddr); 151 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); 152 153 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 154 static inline QEMU_ALWAYS_INLINE \ 155 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 156 uint32_t idx, void *vd, uintptr_t retaddr) \ 157 { \ 158 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 159 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 160 } \ 161 \ 162 static inline QEMU_ALWAYS_INLINE \ 163 void NAME##_host(void *vd, uint32_t idx, void *host) \ 164 { \ 165 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 166 *cur = (ETYPE)LDSUF##_p(host); \ 167 } 168 169 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub) 170 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw) 171 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) 172 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) 173 174 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 175 static inline QEMU_ALWAYS_INLINE \ 176 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 177 uint32_t idx, void *vd, uintptr_t retaddr) \ 178 { \ 179 ETYPE data = *((ETYPE *)vd + H(idx)); \ 180 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 181 } \ 182 \ 183 static inline QEMU_ALWAYS_INLINE \ 184 void NAME##_host(void *vd, uint32_t idx, void *host) \ 185 { \ 186 ETYPE data = *((ETYPE *)vd + H(idx)); \ 187 STSUF##_p(host, data); \ 188 } 189 190 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb) 191 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw) 192 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl) 193 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq) 194 195 static inline QEMU_ALWAYS_INLINE void 196 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb, 197 void *vd, uint32_t evl, target_ulong addr, 198 uint32_t reg_start, uintptr_t ra, uint32_t esz, 199 bool is_load) 200 { 201 uint32_t i; 202 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) { 203 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra); 204 } 205 } 206 207 static inline QEMU_ALWAYS_INLINE void 208 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host, 209 void *vd, uint32_t evl, uint32_t reg_start, void *host, 210 uint32_t esz, bool is_load) 211 { 212 #if HOST_BIG_ENDIAN 213 for (; reg_start < evl; reg_start++, host += esz) { 214 ldst_host(vd, reg_start, host); 215 } 216 #else 217 if (esz == 1) { 218 uint32_t byte_offset = reg_start * esz; 219 uint32_t size = (evl - reg_start) * esz; 220 221 if (is_load) { 222 memcpy(vd + byte_offset, host, size); 223 } else { 224 memcpy(host, vd + byte_offset, size); 225 } 226 } else { 227 for (; reg_start < evl; reg_start++, host += esz) { 228 ldst_host(vd, reg_start, host); 229 } 230 } 231 #endif 232 } 233 234 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 235 uint32_t desc, uint32_t nf, 236 uint32_t esz, uint32_t max_elems) 237 { 238 uint32_t vta = vext_vta(desc); 239 int k; 240 241 if (vta == 0) { 242 return; 243 } 244 245 for (k = 0; k < nf; ++k) { 246 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 247 (k * max_elems + max_elems) * esz); 248 } 249 } 250 251 /* 252 * stride: access vector element from strided memory 253 */ 254 static void 255 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, 256 CPURISCVState *env, uint32_t desc, uint32_t vm, 257 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz, 258 uintptr_t ra) 259 { 260 uint32_t i, k; 261 uint32_t nf = vext_nf(desc); 262 uint32_t max_elems = vext_max_elems(desc, log2_esz); 263 uint32_t esz = 1 << log2_esz; 264 uint32_t vma = vext_vma(desc); 265 266 VSTART_CHECK_EARLY_EXIT(env, env->vl); 267 268 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 269 k = 0; 270 while (k < nf) { 271 if (!vm && !vext_elem_mask(v0, i)) { 272 /* set masked-off elements to 1s */ 273 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 274 (i + k * max_elems + 1) * esz); 275 k++; 276 continue; 277 } 278 target_ulong addr = base + stride * i + (k << log2_esz); 279 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 280 k++; 281 } 282 } 283 env->vstart = 0; 284 285 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 286 } 287 288 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 289 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 290 target_ulong stride, CPURISCVState *env, \ 291 uint32_t desc) \ 292 { \ 293 uint32_t vm = vext_vm(desc); \ 294 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 295 ctzl(sizeof(ETYPE)), GETPC()); \ 296 } 297 298 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb) 299 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb) 300 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb) 301 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb) 302 303 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 304 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 305 target_ulong stride, CPURISCVState *env, \ 306 uint32_t desc) \ 307 { \ 308 uint32_t vm = vext_vm(desc); \ 309 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 310 ctzl(sizeof(ETYPE)), GETPC()); \ 311 } 312 313 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb) 314 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb) 315 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb) 316 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) 317 318 /* 319 * unit-stride: access elements stored contiguously in memory 320 */ 321 322 /* unmasked unit-stride load and store operation */ 323 static inline QEMU_ALWAYS_INLINE void 324 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, 325 uint32_t elems, uint32_t nf, uint32_t max_elems, 326 uint32_t log2_esz, bool is_load, int mmu_index, 327 vext_ldst_elem_fn_tlb *ldst_tlb, 328 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra) 329 { 330 void *host; 331 int i, k, flags; 332 uint32_t esz = 1 << log2_esz; 333 uint32_t size = (elems * nf) << log2_esz; 334 uint32_t evl = env->vstart + elems; 335 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 336 337 /* Check page permission/pmp/watchpoint/etc. */ 338 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type, 339 mmu_index, true, &host, ra); 340 341 if (flags == 0) { 342 if (nf == 1) { 343 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart, 344 host, esz, is_load); 345 } else { 346 for (i = env->vstart; i < evl; ++i) { 347 k = 0; 348 while (k < nf) { 349 ldst_host(vd, i + k * max_elems, host); 350 host += esz; 351 k++; 352 } 353 } 354 } 355 env->vstart += elems; 356 } else { 357 if (nf == 1) { 358 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, 359 ra, esz, is_load); 360 } else { 361 /* load bytes from guest memory */ 362 for (i = env->vstart; i < evl; env->vstart = ++i) { 363 k = 0; 364 while (k < nf) { 365 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 366 vd, ra); 367 addr += esz; 368 k++; 369 } 370 } 371 } 372 } 373 } 374 375 static inline QEMU_ALWAYS_INLINE void 376 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 377 vext_ldst_elem_fn_tlb *ldst_tlb, 378 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 379 uint32_t evl, uintptr_t ra, bool is_load) 380 { 381 uint32_t k; 382 target_ulong page_split, elems, addr; 383 uint32_t nf = vext_nf(desc); 384 uint32_t max_elems = vext_max_elems(desc, log2_esz); 385 uint32_t esz = 1 << log2_esz; 386 uint32_t msize = nf * esz; 387 int mmu_index = riscv_env_mmu_index(env, false); 388 389 VSTART_CHECK_EARLY_EXIT(env, evl); 390 391 #if defined(CONFIG_USER_ONLY) 392 /* 393 * For data sizes <= 6 bytes we get better performance by simply calling 394 * vext_continuous_ldst_tlb 395 */ 396 if (nf == 1 && (evl << log2_esz) <= 6) { 397 addr = base + (env->vstart << log2_esz); 398 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra, 399 esz, is_load); 400 401 env->vstart = 0; 402 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 403 return; 404 } 405 #endif 406 407 /* Calculate the page range of first page */ 408 addr = base + ((env->vstart * nf) << log2_esz); 409 page_split = -(addr | TARGET_PAGE_MASK); 410 /* Get number of elements */ 411 elems = page_split / msize; 412 if (unlikely(env->vstart + elems >= evl)) { 413 elems = evl - env->vstart; 414 } 415 416 /* Load/store elements in the first page */ 417 if (likely(elems)) { 418 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 419 is_load, mmu_index, ldst_tlb, ldst_host, ra); 420 } 421 422 /* Load/store elements in the second page */ 423 if (unlikely(env->vstart < evl)) { 424 /* Cross page element */ 425 if (unlikely(page_split % msize)) { 426 for (k = 0; k < nf; k++) { 427 addr = base + ((env->vstart * nf + k) << log2_esz); 428 ldst_tlb(env, adjust_addr(env, addr), 429 env->vstart + k * max_elems, vd, ra); 430 } 431 env->vstart++; 432 } 433 434 addr = base + ((env->vstart * nf) << log2_esz); 435 /* Get number of elements of second page */ 436 elems = evl - env->vstart; 437 438 /* Load/store elements in the second page */ 439 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 440 is_load, mmu_index, ldst_tlb, ldst_host, ra); 441 } 442 443 env->vstart = 0; 444 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 445 } 446 447 /* 448 * masked unit-stride load and store operation will be a special case of 449 * stride, stride = NF * sizeof (ETYPE) 450 */ 451 452 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 453 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 454 CPURISCVState *env, uint32_t desc) \ 455 { \ 456 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 457 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 458 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 459 } \ 460 \ 461 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 462 CPURISCVState *env, uint32_t desc) \ 463 { \ 464 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 465 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \ 466 } 467 468 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host) 469 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host) 470 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host) 471 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host) 472 473 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 474 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 475 CPURISCVState *env, uint32_t desc) \ 476 { \ 477 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 478 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 479 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 480 } \ 481 \ 482 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 483 CPURISCVState *env, uint32_t desc) \ 484 { \ 485 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 486 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \ 487 } 488 489 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host) 490 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host) 491 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host) 492 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host) 493 494 /* 495 * unit stride mask load and store, EEW = 1 496 */ 497 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 498 CPURISCVState *env, uint32_t desc) 499 { 500 /* evl = ceil(vl/8) */ 501 uint8_t evl = (env->vl + 7) >> 3; 502 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host, 503 0, evl, GETPC(), true); 504 } 505 506 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 507 CPURISCVState *env, uint32_t desc) 508 { 509 /* evl = ceil(vl/8) */ 510 uint8_t evl = (env->vl + 7) >> 3; 511 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host, 512 0, evl, GETPC(), false); 513 } 514 515 /* 516 * index: access vector element from indexed memory 517 */ 518 typedef target_ulong vext_get_index_addr(target_ulong base, 519 uint32_t idx, void *vs2); 520 521 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 522 static target_ulong NAME(target_ulong base, \ 523 uint32_t idx, void *vs2) \ 524 { \ 525 return (base + *((ETYPE *)vs2 + H(idx))); \ 526 } 527 528 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 529 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 530 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 531 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 532 533 static inline void 534 vext_ldst_index(void *vd, void *v0, target_ulong base, 535 void *vs2, CPURISCVState *env, uint32_t desc, 536 vext_get_index_addr get_index_addr, 537 vext_ldst_elem_fn_tlb *ldst_elem, 538 uint32_t log2_esz, uintptr_t ra) 539 { 540 uint32_t i, k; 541 uint32_t nf = vext_nf(desc); 542 uint32_t vm = vext_vm(desc); 543 uint32_t max_elems = vext_max_elems(desc, log2_esz); 544 uint32_t esz = 1 << log2_esz; 545 uint32_t vma = vext_vma(desc); 546 547 VSTART_CHECK_EARLY_EXIT(env, env->vl); 548 549 /* load bytes from guest memory */ 550 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 551 k = 0; 552 while (k < nf) { 553 if (!vm && !vext_elem_mask(v0, i)) { 554 /* set masked-off elements to 1s */ 555 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 556 (i + k * max_elems + 1) * esz); 557 k++; 558 continue; 559 } 560 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 561 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 562 k++; 563 } 564 } 565 env->vstart = 0; 566 567 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 568 } 569 570 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 571 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 572 void *vs2, CPURISCVState *env, uint32_t desc) \ 573 { \ 574 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 575 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 576 } 577 578 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb) 579 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb) 580 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb) 581 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb) 582 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb) 583 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb) 584 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb) 585 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb) 586 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb) 587 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb) 588 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb) 589 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb) 590 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb) 591 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb) 592 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb) 593 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb) 594 595 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 596 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 597 void *vs2, CPURISCVState *env, uint32_t desc) \ 598 { \ 599 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 600 STORE_FN, ctzl(sizeof(ETYPE)), \ 601 GETPC()); \ 602 } 603 604 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb) 605 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb) 606 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb) 607 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb) 608 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb) 609 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb) 610 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb) 611 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb) 612 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb) 613 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb) 614 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb) 615 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb) 616 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb) 617 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb) 618 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb) 619 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb) 620 621 /* 622 * unit-stride fault-only-fisrt load instructions 623 */ 624 static inline void 625 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, 626 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, 627 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra) 628 { 629 uint32_t i, k, vl = 0; 630 uint32_t nf = vext_nf(desc); 631 uint32_t vm = vext_vm(desc); 632 uint32_t max_elems = vext_max_elems(desc, log2_esz); 633 uint32_t esz = 1 << log2_esz; 634 uint32_t msize = nf * esz; 635 uint32_t vma = vext_vma(desc); 636 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems; 637 int mmu_index = riscv_env_mmu_index(env, false); 638 int flags; 639 void *host; 640 641 VSTART_CHECK_EARLY_EXIT(env, env->vl); 642 643 addr = base + ((env->vstart * nf) << log2_esz); 644 page_split = -(addr | TARGET_PAGE_MASK); 645 /* Get number of elements */ 646 elems = page_split / msize; 647 if (unlikely(env->vstart + elems >= env->vl)) { 648 elems = env->vl - env->vstart; 649 } 650 651 /* Check page permission/pmp/watchpoint/etc. */ 652 flags = probe_access_flags(env, adjust_addr(env, addr), elems * msize, 653 MMU_DATA_LOAD, mmu_index, true, &host, ra); 654 655 /* If we are crossing a page check also the second page. */ 656 if (env->vl > elems) { 657 addr_probe = addr + (elems << log2_esz); 658 flags |= probe_access_flags(env, adjust_addr(env, addr_probe), 659 elems * msize, MMU_DATA_LOAD, mmu_index, 660 true, &host, ra); 661 } 662 663 if (flags & ~TLB_WATCHPOINT) { 664 /* probe every access */ 665 for (i = env->vstart; i < env->vl; i++) { 666 if (!vm && !vext_elem_mask(v0, i)) { 667 continue; 668 } 669 addr_i = adjust_addr(env, base + i * (nf << log2_esz)); 670 if (i == 0) { 671 /* Allow fault on first element. */ 672 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD); 673 } else { 674 remain = nf << log2_esz; 675 while (remain > 0) { 676 offset = -(addr_i | TARGET_PAGE_MASK); 677 678 /* Probe nonfault on subsequent elements. */ 679 flags = probe_access_flags(env, addr_i, offset, 680 MMU_DATA_LOAD, mmu_index, true, 681 &host, 0); 682 683 /* 684 * Stop if invalid (unmapped) or mmio (transaction may 685 * fail). Do not stop if watchpoint, as the spec says that 686 * first-fault should continue to access the same 687 * elements regardless of any watchpoint. 688 */ 689 if (flags & ~TLB_WATCHPOINT) { 690 vl = i; 691 goto ProbeSuccess; 692 } 693 if (remain <= offset) { 694 break; 695 } 696 remain -= offset; 697 addr_i = adjust_addr(env, addr_i + offset); 698 } 699 } 700 } 701 } 702 ProbeSuccess: 703 /* load bytes from guest memory */ 704 if (vl != 0) { 705 env->vl = vl; 706 } 707 708 if (env->vstart < env->vl) { 709 if (vm) { 710 /* Load/store elements in the first page */ 711 if (likely(elems)) { 712 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 713 log2_esz, true, mmu_index, ldst_tlb, 714 ldst_host, ra); 715 } 716 717 /* Load/store elements in the second page */ 718 if (unlikely(env->vstart < env->vl)) { 719 /* Cross page element */ 720 if (unlikely(page_split % msize)) { 721 for (k = 0; k < nf; k++) { 722 addr = base + ((env->vstart * nf + k) << log2_esz); 723 ldst_tlb(env, adjust_addr(env, addr), 724 env->vstart + k * max_elems, vd, ra); 725 } 726 env->vstart++; 727 } 728 729 addr = base + ((env->vstart * nf) << log2_esz); 730 /* Get number of elements of second page */ 731 elems = env->vl - env->vstart; 732 733 /* Load/store elements in the second page */ 734 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 735 log2_esz, true, mmu_index, ldst_tlb, 736 ldst_host, ra); 737 } 738 } else { 739 for (i = env->vstart; i < env->vl; i++) { 740 k = 0; 741 while (k < nf) { 742 if (!vext_elem_mask(v0, i)) { 743 /* set masked-off elements to 1s */ 744 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 745 (i + k * max_elems + 1) * esz); 746 k++; 747 continue; 748 } 749 addr = base + ((i * nf + k) << log2_esz); 750 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 751 vd, ra); 752 k++; 753 } 754 } 755 } 756 } 757 env->vstart = 0; 758 759 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 760 } 761 762 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 763 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 764 CPURISCVState *env, uint32_t desc) \ 765 { \ 766 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \ 767 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \ 768 } 769 770 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host) 771 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host) 772 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host) 773 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) 774 775 #define DO_SWAP(N, M) (M) 776 #define DO_AND(N, M) (N & M) 777 #define DO_XOR(N, M) (N ^ M) 778 #define DO_OR(N, M) (N | M) 779 #define DO_ADD(N, M) (N + M) 780 781 /* Signed min/max */ 782 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 783 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 784 785 /* 786 * load and store whole register instructions 787 */ 788 static inline QEMU_ALWAYS_INLINE void 789 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 790 vext_ldst_elem_fn_tlb *ldst_tlb, 791 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 792 uintptr_t ra, bool is_load) 793 { 794 target_ulong page_split, elems, addr; 795 uint32_t nf = vext_nf(desc); 796 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 797 uint32_t max_elems = vlenb >> log2_esz; 798 uint32_t evl = nf * max_elems; 799 uint32_t esz = 1 << log2_esz; 800 int mmu_index = riscv_env_mmu_index(env, false); 801 802 /* Calculate the page range of first page */ 803 addr = base + (env->vstart << log2_esz); 804 page_split = -(addr | TARGET_PAGE_MASK); 805 /* Get number of elements */ 806 elems = page_split / esz; 807 if (unlikely(env->vstart + elems >= evl)) { 808 elems = evl - env->vstart; 809 } 810 811 /* Load/store elements in the first page */ 812 if (likely(elems)) { 813 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 814 is_load, mmu_index, ldst_tlb, ldst_host, ra); 815 } 816 817 /* Load/store elements in the second page */ 818 if (unlikely(env->vstart < evl)) { 819 /* Cross page element */ 820 if (unlikely(page_split % esz)) { 821 addr = base + (env->vstart << log2_esz); 822 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra); 823 env->vstart++; 824 } 825 826 addr = base + (env->vstart << log2_esz); 827 /* Get number of elements of second page */ 828 elems = evl - env->vstart; 829 830 /* Load/store elements in the second page */ 831 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 832 is_load, mmu_index, ldst_tlb, ldst_host, ra); 833 } 834 835 env->vstart = 0; 836 } 837 838 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 839 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 840 uint32_t desc) \ 841 { \ 842 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 843 ctzl(sizeof(ETYPE)), GETPC(), true); \ 844 } 845 846 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host) 847 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host) 848 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host) 849 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host) 850 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host) 851 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host) 852 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host) 853 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host) 854 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host) 855 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host) 856 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host) 857 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host) 858 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host) 859 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host) 860 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host) 861 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host) 862 863 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 864 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 865 uint32_t desc) \ 866 { \ 867 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 868 ctzl(sizeof(ETYPE)), GETPC(), false); \ 869 } 870 871 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host) 872 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host) 873 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host) 874 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host) 875 876 /* 877 * Vector Integer Arithmetic Instructions 878 */ 879 880 /* (TD, T1, T2, TX1, TX2) */ 881 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 882 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 883 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 884 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 885 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 886 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 887 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 888 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 889 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 890 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 891 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 892 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 893 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 894 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 895 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 896 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 897 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 898 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 899 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 900 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 901 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 902 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 903 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 904 905 #define DO_SUB(N, M) (N - M) 906 #define DO_RSUB(N, M) (M - N) 907 908 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 909 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 910 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 911 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 912 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 913 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 914 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 915 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 916 917 GEN_VEXT_VV(vadd_vv_b, 1) 918 GEN_VEXT_VV(vadd_vv_h, 2) 919 GEN_VEXT_VV(vadd_vv_w, 4) 920 GEN_VEXT_VV(vadd_vv_d, 8) 921 GEN_VEXT_VV(vsub_vv_b, 1) 922 GEN_VEXT_VV(vsub_vv_h, 2) 923 GEN_VEXT_VV(vsub_vv_w, 4) 924 GEN_VEXT_VV(vsub_vv_d, 8) 925 926 927 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 928 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 929 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 930 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 931 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 932 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 933 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 934 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 935 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 936 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 937 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 938 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 939 940 GEN_VEXT_VX(vadd_vx_b, 1) 941 GEN_VEXT_VX(vadd_vx_h, 2) 942 GEN_VEXT_VX(vadd_vx_w, 4) 943 GEN_VEXT_VX(vadd_vx_d, 8) 944 GEN_VEXT_VX(vsub_vx_b, 1) 945 GEN_VEXT_VX(vsub_vx_h, 2) 946 GEN_VEXT_VX(vsub_vx_w, 4) 947 GEN_VEXT_VX(vsub_vx_d, 8) 948 GEN_VEXT_VX(vrsub_vx_b, 1) 949 GEN_VEXT_VX(vrsub_vx_h, 2) 950 GEN_VEXT_VX(vrsub_vx_w, 4) 951 GEN_VEXT_VX(vrsub_vx_d, 8) 952 953 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 954 { 955 intptr_t oprsz = simd_oprsz(desc); 956 intptr_t i; 957 958 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 959 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 960 } 961 } 962 963 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 964 { 965 intptr_t oprsz = simd_oprsz(desc); 966 intptr_t i; 967 968 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 969 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 970 } 971 } 972 973 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 974 { 975 intptr_t oprsz = simd_oprsz(desc); 976 intptr_t i; 977 978 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 979 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 980 } 981 } 982 983 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 984 { 985 intptr_t oprsz = simd_oprsz(desc); 986 intptr_t i; 987 988 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 989 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 990 } 991 } 992 993 /* Vector Widening Integer Add/Subtract */ 994 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 995 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 996 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 997 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 998 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 999 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 1000 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 1001 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 1002 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 1003 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 1004 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 1005 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 1006 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 1007 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 1008 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 1009 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 1010 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 1011 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1012 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1013 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1014 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1015 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1016 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1017 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1018 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1019 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1020 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1021 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1022 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1023 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1024 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1025 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1026 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1027 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1028 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1029 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1030 GEN_VEXT_VV(vwaddu_vv_b, 2) 1031 GEN_VEXT_VV(vwaddu_vv_h, 4) 1032 GEN_VEXT_VV(vwaddu_vv_w, 8) 1033 GEN_VEXT_VV(vwsubu_vv_b, 2) 1034 GEN_VEXT_VV(vwsubu_vv_h, 4) 1035 GEN_VEXT_VV(vwsubu_vv_w, 8) 1036 GEN_VEXT_VV(vwadd_vv_b, 2) 1037 GEN_VEXT_VV(vwadd_vv_h, 4) 1038 GEN_VEXT_VV(vwadd_vv_w, 8) 1039 GEN_VEXT_VV(vwsub_vv_b, 2) 1040 GEN_VEXT_VV(vwsub_vv_h, 4) 1041 GEN_VEXT_VV(vwsub_vv_w, 8) 1042 GEN_VEXT_VV(vwaddu_wv_b, 2) 1043 GEN_VEXT_VV(vwaddu_wv_h, 4) 1044 GEN_VEXT_VV(vwaddu_wv_w, 8) 1045 GEN_VEXT_VV(vwsubu_wv_b, 2) 1046 GEN_VEXT_VV(vwsubu_wv_h, 4) 1047 GEN_VEXT_VV(vwsubu_wv_w, 8) 1048 GEN_VEXT_VV(vwadd_wv_b, 2) 1049 GEN_VEXT_VV(vwadd_wv_h, 4) 1050 GEN_VEXT_VV(vwadd_wv_w, 8) 1051 GEN_VEXT_VV(vwsub_wv_b, 2) 1052 GEN_VEXT_VV(vwsub_wv_h, 4) 1053 GEN_VEXT_VV(vwsub_wv_w, 8) 1054 1055 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1056 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1057 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1058 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1059 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1060 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1061 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1062 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1063 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1064 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1065 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1066 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1067 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1068 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1069 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1070 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1071 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1072 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1073 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1074 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1075 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1076 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1077 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1078 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1079 GEN_VEXT_VX(vwaddu_vx_b, 2) 1080 GEN_VEXT_VX(vwaddu_vx_h, 4) 1081 GEN_VEXT_VX(vwaddu_vx_w, 8) 1082 GEN_VEXT_VX(vwsubu_vx_b, 2) 1083 GEN_VEXT_VX(vwsubu_vx_h, 4) 1084 GEN_VEXT_VX(vwsubu_vx_w, 8) 1085 GEN_VEXT_VX(vwadd_vx_b, 2) 1086 GEN_VEXT_VX(vwadd_vx_h, 4) 1087 GEN_VEXT_VX(vwadd_vx_w, 8) 1088 GEN_VEXT_VX(vwsub_vx_b, 2) 1089 GEN_VEXT_VX(vwsub_vx_h, 4) 1090 GEN_VEXT_VX(vwsub_vx_w, 8) 1091 GEN_VEXT_VX(vwaddu_wx_b, 2) 1092 GEN_VEXT_VX(vwaddu_wx_h, 4) 1093 GEN_VEXT_VX(vwaddu_wx_w, 8) 1094 GEN_VEXT_VX(vwsubu_wx_b, 2) 1095 GEN_VEXT_VX(vwsubu_wx_h, 4) 1096 GEN_VEXT_VX(vwsubu_wx_w, 8) 1097 GEN_VEXT_VX(vwadd_wx_b, 2) 1098 GEN_VEXT_VX(vwadd_wx_h, 4) 1099 GEN_VEXT_VX(vwadd_wx_w, 8) 1100 GEN_VEXT_VX(vwsub_wx_b, 2) 1101 GEN_VEXT_VX(vwsub_wx_h, 4) 1102 GEN_VEXT_VX(vwsub_wx_w, 8) 1103 1104 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1105 #define DO_VADC(N, M, C) (N + M + C) 1106 #define DO_VSBC(N, M, C) (N - M - C) 1107 1108 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1109 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1110 CPURISCVState *env, uint32_t desc) \ 1111 { \ 1112 uint32_t vl = env->vl; \ 1113 uint32_t esz = sizeof(ETYPE); \ 1114 uint32_t total_elems = \ 1115 vext_get_total_elems(env, desc, esz); \ 1116 uint32_t vta = vext_vta(desc); \ 1117 uint32_t i; \ 1118 \ 1119 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1120 \ 1121 for (i = env->vstart; i < vl; i++) { \ 1122 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1123 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1124 ETYPE carry = vext_elem_mask(v0, i); \ 1125 \ 1126 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1127 } \ 1128 env->vstart = 0; \ 1129 /* set tail elements to 1s */ \ 1130 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1131 } 1132 1133 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1134 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1135 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1136 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1137 1138 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1139 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1140 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1141 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1142 1143 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1144 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1145 CPURISCVState *env, uint32_t desc) \ 1146 { \ 1147 uint32_t vl = env->vl; \ 1148 uint32_t esz = sizeof(ETYPE); \ 1149 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1150 uint32_t vta = vext_vta(desc); \ 1151 uint32_t i; \ 1152 \ 1153 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1154 \ 1155 for (i = env->vstart; i < vl; i++) { \ 1156 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1157 ETYPE carry = vext_elem_mask(v0, i); \ 1158 \ 1159 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1160 } \ 1161 env->vstart = 0; \ 1162 /* set tail elements to 1s */ \ 1163 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1164 } 1165 1166 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1167 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1168 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1169 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1170 1171 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1172 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1173 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1174 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1175 1176 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1177 (__typeof(N))(N + M) < N) 1178 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1179 1180 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1181 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1182 CPURISCVState *env, uint32_t desc) \ 1183 { \ 1184 uint32_t vl = env->vl; \ 1185 uint32_t vm = vext_vm(desc); \ 1186 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1187 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1188 uint32_t i; \ 1189 \ 1190 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1191 \ 1192 for (i = env->vstart; i < vl; i++) { \ 1193 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1194 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1195 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1196 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1197 } \ 1198 env->vstart = 0; \ 1199 /* 1200 * mask destination register are always tail-agnostic 1201 * set tail elements to 1s 1202 */ \ 1203 if (vta_all_1s) { \ 1204 for (; i < total_elems; i++) { \ 1205 vext_set_elem_mask(vd, i, 1); \ 1206 } \ 1207 } \ 1208 } 1209 1210 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1211 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1212 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1213 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1214 1215 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1216 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1217 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1218 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1219 1220 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1221 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1222 void *vs2, CPURISCVState *env, uint32_t desc) \ 1223 { \ 1224 uint32_t vl = env->vl; \ 1225 uint32_t vm = vext_vm(desc); \ 1226 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1227 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1228 uint32_t i; \ 1229 \ 1230 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1231 \ 1232 for (i = env->vstart; i < vl; i++) { \ 1233 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1234 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1235 vext_set_elem_mask(vd, i, \ 1236 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1237 } \ 1238 env->vstart = 0; \ 1239 /* 1240 * mask destination register are always tail-agnostic 1241 * set tail elements to 1s 1242 */ \ 1243 if (vta_all_1s) { \ 1244 for (; i < total_elems; i++) { \ 1245 vext_set_elem_mask(vd, i, 1); \ 1246 } \ 1247 } \ 1248 } 1249 1250 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1251 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1252 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1253 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1254 1255 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1256 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1257 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1258 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1259 1260 /* Vector Bitwise Logical Instructions */ 1261 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1262 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1263 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1264 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1265 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1266 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1267 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1268 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1269 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1270 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1271 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1272 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1273 GEN_VEXT_VV(vand_vv_b, 1) 1274 GEN_VEXT_VV(vand_vv_h, 2) 1275 GEN_VEXT_VV(vand_vv_w, 4) 1276 GEN_VEXT_VV(vand_vv_d, 8) 1277 GEN_VEXT_VV(vor_vv_b, 1) 1278 GEN_VEXT_VV(vor_vv_h, 2) 1279 GEN_VEXT_VV(vor_vv_w, 4) 1280 GEN_VEXT_VV(vor_vv_d, 8) 1281 GEN_VEXT_VV(vxor_vv_b, 1) 1282 GEN_VEXT_VV(vxor_vv_h, 2) 1283 GEN_VEXT_VV(vxor_vv_w, 4) 1284 GEN_VEXT_VV(vxor_vv_d, 8) 1285 1286 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1287 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1288 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1289 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1290 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1291 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1292 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1293 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1294 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1295 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1296 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1297 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1298 GEN_VEXT_VX(vand_vx_b, 1) 1299 GEN_VEXT_VX(vand_vx_h, 2) 1300 GEN_VEXT_VX(vand_vx_w, 4) 1301 GEN_VEXT_VX(vand_vx_d, 8) 1302 GEN_VEXT_VX(vor_vx_b, 1) 1303 GEN_VEXT_VX(vor_vx_h, 2) 1304 GEN_VEXT_VX(vor_vx_w, 4) 1305 GEN_VEXT_VX(vor_vx_d, 8) 1306 GEN_VEXT_VX(vxor_vx_b, 1) 1307 GEN_VEXT_VX(vxor_vx_h, 2) 1308 GEN_VEXT_VX(vxor_vx_w, 4) 1309 GEN_VEXT_VX(vxor_vx_d, 8) 1310 1311 /* Vector Single-Width Bit Shift Instructions */ 1312 #define DO_SLL(N, M) (N << (M)) 1313 #define DO_SRL(N, M) (N >> (M)) 1314 1315 /* generate the helpers for shift instructions with two vector operators */ 1316 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1317 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1318 void *vs2, CPURISCVState *env, uint32_t desc) \ 1319 { \ 1320 uint32_t vm = vext_vm(desc); \ 1321 uint32_t vl = env->vl; \ 1322 uint32_t esz = sizeof(TS1); \ 1323 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1324 uint32_t vta = vext_vta(desc); \ 1325 uint32_t vma = vext_vma(desc); \ 1326 uint32_t i; \ 1327 \ 1328 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1329 \ 1330 for (i = env->vstart; i < vl; i++) { \ 1331 if (!vm && !vext_elem_mask(v0, i)) { \ 1332 /* set masked-off elements to 1s */ \ 1333 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1334 continue; \ 1335 } \ 1336 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1337 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1338 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1339 } \ 1340 env->vstart = 0; \ 1341 /* set tail elements to 1s */ \ 1342 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1343 } 1344 1345 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1346 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1347 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1348 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1349 1350 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1351 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1352 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1353 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1354 1355 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1356 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1357 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1358 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1359 1360 /* 1361 * generate the helpers for shift instructions with one vector and one scalar 1362 */ 1363 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1364 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1365 void *vs2, CPURISCVState *env, \ 1366 uint32_t desc) \ 1367 { \ 1368 uint32_t vm = vext_vm(desc); \ 1369 uint32_t vl = env->vl; \ 1370 uint32_t esz = sizeof(TD); \ 1371 uint32_t total_elems = \ 1372 vext_get_total_elems(env, desc, esz); \ 1373 uint32_t vta = vext_vta(desc); \ 1374 uint32_t vma = vext_vma(desc); \ 1375 uint32_t i; \ 1376 \ 1377 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1378 \ 1379 for (i = env->vstart; i < vl; i++) { \ 1380 if (!vm && !vext_elem_mask(v0, i)) { \ 1381 /* set masked-off elements to 1s */ \ 1382 vext_set_elems_1s(vd, vma, i * esz, \ 1383 (i + 1) * esz); \ 1384 continue; \ 1385 } \ 1386 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1387 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1388 } \ 1389 env->vstart = 0; \ 1390 /* set tail elements to 1s */ \ 1391 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1392 } 1393 1394 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1395 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1396 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1397 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1398 1399 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1400 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1401 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1402 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1403 1404 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1405 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1406 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1407 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1408 1409 /* Vector Narrowing Integer Right Shift Instructions */ 1410 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1411 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1412 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1413 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1414 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1415 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1416 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1417 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1418 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1419 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1420 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1421 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1422 1423 /* Vector Integer Comparison Instructions */ 1424 #define DO_MSEQ(N, M) (N == M) 1425 #define DO_MSNE(N, M) (N != M) 1426 #define DO_MSLT(N, M) (N < M) 1427 #define DO_MSLE(N, M) (N <= M) 1428 #define DO_MSGT(N, M) (N > M) 1429 1430 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1431 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1432 CPURISCVState *env, uint32_t desc) \ 1433 { \ 1434 uint32_t vm = vext_vm(desc); \ 1435 uint32_t vl = env->vl; \ 1436 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1437 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1438 uint32_t vma = vext_vma(desc); \ 1439 uint32_t i; \ 1440 \ 1441 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1442 \ 1443 for (i = env->vstart; i < vl; i++) { \ 1444 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1445 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1446 if (!vm && !vext_elem_mask(v0, i)) { \ 1447 /* set masked-off elements to 1s */ \ 1448 if (vma) { \ 1449 vext_set_elem_mask(vd, i, 1); \ 1450 } \ 1451 continue; \ 1452 } \ 1453 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1454 } \ 1455 env->vstart = 0; \ 1456 /* 1457 * mask destination register are always tail-agnostic 1458 * set tail elements to 1s 1459 */ \ 1460 if (vta_all_1s) { \ 1461 for (; i < total_elems; i++) { \ 1462 vext_set_elem_mask(vd, i, 1); \ 1463 } \ 1464 } \ 1465 } 1466 1467 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1468 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1469 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1470 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1471 1472 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1473 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1474 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1475 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1476 1477 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1478 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1479 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1480 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1481 1482 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1483 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1484 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1485 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1486 1487 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1488 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1489 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1490 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1491 1492 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1493 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1494 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1495 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1496 1497 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1498 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1499 CPURISCVState *env, uint32_t desc) \ 1500 { \ 1501 uint32_t vm = vext_vm(desc); \ 1502 uint32_t vl = env->vl; \ 1503 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1504 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1505 uint32_t vma = vext_vma(desc); \ 1506 uint32_t i; \ 1507 \ 1508 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1509 \ 1510 for (i = env->vstart; i < vl; i++) { \ 1511 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1512 if (!vm && !vext_elem_mask(v0, i)) { \ 1513 /* set masked-off elements to 1s */ \ 1514 if (vma) { \ 1515 vext_set_elem_mask(vd, i, 1); \ 1516 } \ 1517 continue; \ 1518 } \ 1519 vext_set_elem_mask(vd, i, \ 1520 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1521 } \ 1522 env->vstart = 0; \ 1523 /* 1524 * mask destination register are always tail-agnostic 1525 * set tail elements to 1s 1526 */ \ 1527 if (vta_all_1s) { \ 1528 for (; i < total_elems; i++) { \ 1529 vext_set_elem_mask(vd, i, 1); \ 1530 } \ 1531 } \ 1532 } 1533 1534 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1535 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1536 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1537 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1538 1539 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1540 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1541 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1542 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1543 1544 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1545 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1546 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1547 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1548 1549 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1550 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1551 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1552 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1553 1554 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1555 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1556 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1557 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1558 1559 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1560 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1561 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1562 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1563 1564 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1565 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1566 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1567 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1568 1569 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1570 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1571 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1572 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1573 1574 /* Vector Integer Min/Max Instructions */ 1575 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1576 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1577 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1578 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1579 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1580 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1581 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1582 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1583 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1584 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1585 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1586 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1587 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1588 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1589 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1590 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1591 GEN_VEXT_VV(vminu_vv_b, 1) 1592 GEN_VEXT_VV(vminu_vv_h, 2) 1593 GEN_VEXT_VV(vminu_vv_w, 4) 1594 GEN_VEXT_VV(vminu_vv_d, 8) 1595 GEN_VEXT_VV(vmin_vv_b, 1) 1596 GEN_VEXT_VV(vmin_vv_h, 2) 1597 GEN_VEXT_VV(vmin_vv_w, 4) 1598 GEN_VEXT_VV(vmin_vv_d, 8) 1599 GEN_VEXT_VV(vmaxu_vv_b, 1) 1600 GEN_VEXT_VV(vmaxu_vv_h, 2) 1601 GEN_VEXT_VV(vmaxu_vv_w, 4) 1602 GEN_VEXT_VV(vmaxu_vv_d, 8) 1603 GEN_VEXT_VV(vmax_vv_b, 1) 1604 GEN_VEXT_VV(vmax_vv_h, 2) 1605 GEN_VEXT_VV(vmax_vv_w, 4) 1606 GEN_VEXT_VV(vmax_vv_d, 8) 1607 1608 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1609 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1610 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1611 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1612 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1613 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1614 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1615 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1616 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1617 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1618 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1619 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1620 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1621 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1622 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1623 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1624 GEN_VEXT_VX(vminu_vx_b, 1) 1625 GEN_VEXT_VX(vminu_vx_h, 2) 1626 GEN_VEXT_VX(vminu_vx_w, 4) 1627 GEN_VEXT_VX(vminu_vx_d, 8) 1628 GEN_VEXT_VX(vmin_vx_b, 1) 1629 GEN_VEXT_VX(vmin_vx_h, 2) 1630 GEN_VEXT_VX(vmin_vx_w, 4) 1631 GEN_VEXT_VX(vmin_vx_d, 8) 1632 GEN_VEXT_VX(vmaxu_vx_b, 1) 1633 GEN_VEXT_VX(vmaxu_vx_h, 2) 1634 GEN_VEXT_VX(vmaxu_vx_w, 4) 1635 GEN_VEXT_VX(vmaxu_vx_d, 8) 1636 GEN_VEXT_VX(vmax_vx_b, 1) 1637 GEN_VEXT_VX(vmax_vx_h, 2) 1638 GEN_VEXT_VX(vmax_vx_w, 4) 1639 GEN_VEXT_VX(vmax_vx_d, 8) 1640 1641 /* Vector Single-Width Integer Multiply Instructions */ 1642 #define DO_MUL(N, M) (N * M) 1643 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1644 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1645 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1646 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1647 GEN_VEXT_VV(vmul_vv_b, 1) 1648 GEN_VEXT_VV(vmul_vv_h, 2) 1649 GEN_VEXT_VV(vmul_vv_w, 4) 1650 GEN_VEXT_VV(vmul_vv_d, 8) 1651 1652 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1653 { 1654 return (int16_t)s2 * (int16_t)s1 >> 8; 1655 } 1656 1657 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1658 { 1659 return (int32_t)s2 * (int32_t)s1 >> 16; 1660 } 1661 1662 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1663 { 1664 return (int64_t)s2 * (int64_t)s1 >> 32; 1665 } 1666 1667 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1668 { 1669 uint64_t hi_64, lo_64; 1670 1671 muls64(&lo_64, &hi_64, s1, s2); 1672 return hi_64; 1673 } 1674 1675 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1676 { 1677 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1678 } 1679 1680 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1681 { 1682 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1683 } 1684 1685 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1686 { 1687 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1688 } 1689 1690 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1691 { 1692 uint64_t hi_64, lo_64; 1693 1694 mulu64(&lo_64, &hi_64, s2, s1); 1695 return hi_64; 1696 } 1697 1698 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1699 { 1700 return (int16_t)s2 * (uint16_t)s1 >> 8; 1701 } 1702 1703 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1704 { 1705 return (int32_t)s2 * (uint32_t)s1 >> 16; 1706 } 1707 1708 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1709 { 1710 return (int64_t)s2 * (uint64_t)s1 >> 32; 1711 } 1712 1713 /* 1714 * Let A = signed operand, 1715 * B = unsigned operand 1716 * P = mulu64(A, B), unsigned product 1717 * 1718 * LET X = 2 ** 64 - A, 2's complement of A 1719 * SP = signed product 1720 * THEN 1721 * IF A < 0 1722 * SP = -X * B 1723 * = -(2 ** 64 - A) * B 1724 * = A * B - 2 ** 64 * B 1725 * = P - 2 ** 64 * B 1726 * ELSE 1727 * SP = P 1728 * THEN 1729 * HI_P -= (A < 0 ? B : 0) 1730 */ 1731 1732 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1733 { 1734 uint64_t hi_64, lo_64; 1735 1736 mulu64(&lo_64, &hi_64, s2, s1); 1737 1738 hi_64 -= s2 < 0 ? s1 : 0; 1739 return hi_64; 1740 } 1741 1742 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1743 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1744 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1745 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1746 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1747 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1748 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1749 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1750 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1751 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1752 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1753 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1754 GEN_VEXT_VV(vmulh_vv_b, 1) 1755 GEN_VEXT_VV(vmulh_vv_h, 2) 1756 GEN_VEXT_VV(vmulh_vv_w, 4) 1757 GEN_VEXT_VV(vmulh_vv_d, 8) 1758 GEN_VEXT_VV(vmulhu_vv_b, 1) 1759 GEN_VEXT_VV(vmulhu_vv_h, 2) 1760 GEN_VEXT_VV(vmulhu_vv_w, 4) 1761 GEN_VEXT_VV(vmulhu_vv_d, 8) 1762 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1763 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1764 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1765 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1766 1767 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1768 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1769 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1770 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1771 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1772 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1773 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1774 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1775 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1776 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1777 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1778 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1779 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1780 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1781 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1782 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1783 GEN_VEXT_VX(vmul_vx_b, 1) 1784 GEN_VEXT_VX(vmul_vx_h, 2) 1785 GEN_VEXT_VX(vmul_vx_w, 4) 1786 GEN_VEXT_VX(vmul_vx_d, 8) 1787 GEN_VEXT_VX(vmulh_vx_b, 1) 1788 GEN_VEXT_VX(vmulh_vx_h, 2) 1789 GEN_VEXT_VX(vmulh_vx_w, 4) 1790 GEN_VEXT_VX(vmulh_vx_d, 8) 1791 GEN_VEXT_VX(vmulhu_vx_b, 1) 1792 GEN_VEXT_VX(vmulhu_vx_h, 2) 1793 GEN_VEXT_VX(vmulhu_vx_w, 4) 1794 GEN_VEXT_VX(vmulhu_vx_d, 8) 1795 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1796 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1797 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1798 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1799 1800 /* Vector Integer Divide Instructions */ 1801 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1802 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1803 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1804 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1805 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1806 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1807 1808 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1809 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1810 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1811 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1812 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1813 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1814 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1815 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1816 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1817 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1818 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1819 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1820 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1821 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1822 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1823 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1824 GEN_VEXT_VV(vdivu_vv_b, 1) 1825 GEN_VEXT_VV(vdivu_vv_h, 2) 1826 GEN_VEXT_VV(vdivu_vv_w, 4) 1827 GEN_VEXT_VV(vdivu_vv_d, 8) 1828 GEN_VEXT_VV(vdiv_vv_b, 1) 1829 GEN_VEXT_VV(vdiv_vv_h, 2) 1830 GEN_VEXT_VV(vdiv_vv_w, 4) 1831 GEN_VEXT_VV(vdiv_vv_d, 8) 1832 GEN_VEXT_VV(vremu_vv_b, 1) 1833 GEN_VEXT_VV(vremu_vv_h, 2) 1834 GEN_VEXT_VV(vremu_vv_w, 4) 1835 GEN_VEXT_VV(vremu_vv_d, 8) 1836 GEN_VEXT_VV(vrem_vv_b, 1) 1837 GEN_VEXT_VV(vrem_vv_h, 2) 1838 GEN_VEXT_VV(vrem_vv_w, 4) 1839 GEN_VEXT_VV(vrem_vv_d, 8) 1840 1841 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1842 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1843 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1844 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1845 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1846 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1847 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1848 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1849 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1850 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1851 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1852 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1853 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1854 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1855 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1856 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1857 GEN_VEXT_VX(vdivu_vx_b, 1) 1858 GEN_VEXT_VX(vdivu_vx_h, 2) 1859 GEN_VEXT_VX(vdivu_vx_w, 4) 1860 GEN_VEXT_VX(vdivu_vx_d, 8) 1861 GEN_VEXT_VX(vdiv_vx_b, 1) 1862 GEN_VEXT_VX(vdiv_vx_h, 2) 1863 GEN_VEXT_VX(vdiv_vx_w, 4) 1864 GEN_VEXT_VX(vdiv_vx_d, 8) 1865 GEN_VEXT_VX(vremu_vx_b, 1) 1866 GEN_VEXT_VX(vremu_vx_h, 2) 1867 GEN_VEXT_VX(vremu_vx_w, 4) 1868 GEN_VEXT_VX(vremu_vx_d, 8) 1869 GEN_VEXT_VX(vrem_vx_b, 1) 1870 GEN_VEXT_VX(vrem_vx_h, 2) 1871 GEN_VEXT_VX(vrem_vx_w, 4) 1872 GEN_VEXT_VX(vrem_vx_d, 8) 1873 1874 /* Vector Widening Integer Multiply Instructions */ 1875 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1876 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1877 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1878 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1879 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1880 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1881 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1882 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1883 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1884 GEN_VEXT_VV(vwmul_vv_b, 2) 1885 GEN_VEXT_VV(vwmul_vv_h, 4) 1886 GEN_VEXT_VV(vwmul_vv_w, 8) 1887 GEN_VEXT_VV(vwmulu_vv_b, 2) 1888 GEN_VEXT_VV(vwmulu_vv_h, 4) 1889 GEN_VEXT_VV(vwmulu_vv_w, 8) 1890 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1891 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1892 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1893 1894 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1895 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1896 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1897 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1898 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1899 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1900 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1901 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1902 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1903 GEN_VEXT_VX(vwmul_vx_b, 2) 1904 GEN_VEXT_VX(vwmul_vx_h, 4) 1905 GEN_VEXT_VX(vwmul_vx_w, 8) 1906 GEN_VEXT_VX(vwmulu_vx_b, 2) 1907 GEN_VEXT_VX(vwmulu_vx_h, 4) 1908 GEN_VEXT_VX(vwmulu_vx_w, 8) 1909 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1910 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1911 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1912 1913 /* Vector Single-Width Integer Multiply-Add Instructions */ 1914 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1915 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1916 { \ 1917 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1918 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1919 TD d = *((TD *)vd + HD(i)); \ 1920 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1921 } 1922 1923 #define DO_MACC(N, M, D) (M * N + D) 1924 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1925 #define DO_MADD(N, M, D) (M * D + N) 1926 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1927 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1928 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1929 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1930 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1931 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1932 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1933 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1934 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1935 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1936 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1937 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1938 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1939 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1940 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1941 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1942 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1943 GEN_VEXT_VV(vmacc_vv_b, 1) 1944 GEN_VEXT_VV(vmacc_vv_h, 2) 1945 GEN_VEXT_VV(vmacc_vv_w, 4) 1946 GEN_VEXT_VV(vmacc_vv_d, 8) 1947 GEN_VEXT_VV(vnmsac_vv_b, 1) 1948 GEN_VEXT_VV(vnmsac_vv_h, 2) 1949 GEN_VEXT_VV(vnmsac_vv_w, 4) 1950 GEN_VEXT_VV(vnmsac_vv_d, 8) 1951 GEN_VEXT_VV(vmadd_vv_b, 1) 1952 GEN_VEXT_VV(vmadd_vv_h, 2) 1953 GEN_VEXT_VV(vmadd_vv_w, 4) 1954 GEN_VEXT_VV(vmadd_vv_d, 8) 1955 GEN_VEXT_VV(vnmsub_vv_b, 1) 1956 GEN_VEXT_VV(vnmsub_vv_h, 2) 1957 GEN_VEXT_VV(vnmsub_vv_w, 4) 1958 GEN_VEXT_VV(vnmsub_vv_d, 8) 1959 1960 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1961 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1962 { \ 1963 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1964 TD d = *((TD *)vd + HD(i)); \ 1965 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1966 } 1967 1968 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1969 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1970 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1971 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1972 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1973 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1974 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1975 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1976 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1977 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1978 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1979 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1980 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1981 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1982 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1983 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1984 GEN_VEXT_VX(vmacc_vx_b, 1) 1985 GEN_VEXT_VX(vmacc_vx_h, 2) 1986 GEN_VEXT_VX(vmacc_vx_w, 4) 1987 GEN_VEXT_VX(vmacc_vx_d, 8) 1988 GEN_VEXT_VX(vnmsac_vx_b, 1) 1989 GEN_VEXT_VX(vnmsac_vx_h, 2) 1990 GEN_VEXT_VX(vnmsac_vx_w, 4) 1991 GEN_VEXT_VX(vnmsac_vx_d, 8) 1992 GEN_VEXT_VX(vmadd_vx_b, 1) 1993 GEN_VEXT_VX(vmadd_vx_h, 2) 1994 GEN_VEXT_VX(vmadd_vx_w, 4) 1995 GEN_VEXT_VX(vmadd_vx_d, 8) 1996 GEN_VEXT_VX(vnmsub_vx_b, 1) 1997 GEN_VEXT_VX(vnmsub_vx_h, 2) 1998 GEN_VEXT_VX(vnmsub_vx_w, 4) 1999 GEN_VEXT_VX(vnmsub_vx_d, 8) 2000 2001 /* Vector Widening Integer Multiply-Add Instructions */ 2002 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 2003 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 2004 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 2005 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 2006 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 2007 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 2008 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 2009 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 2010 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 2011 GEN_VEXT_VV(vwmaccu_vv_b, 2) 2012 GEN_VEXT_VV(vwmaccu_vv_h, 4) 2013 GEN_VEXT_VV(vwmaccu_vv_w, 8) 2014 GEN_VEXT_VV(vwmacc_vv_b, 2) 2015 GEN_VEXT_VV(vwmacc_vv_h, 4) 2016 GEN_VEXT_VV(vwmacc_vv_w, 8) 2017 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 2018 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 2019 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 2020 2021 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 2022 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 2023 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 2024 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 2025 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 2026 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 2027 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 2028 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 2029 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 2030 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 2031 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 2032 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 2033 GEN_VEXT_VX(vwmaccu_vx_b, 2) 2034 GEN_VEXT_VX(vwmaccu_vx_h, 4) 2035 GEN_VEXT_VX(vwmaccu_vx_w, 8) 2036 GEN_VEXT_VX(vwmacc_vx_b, 2) 2037 GEN_VEXT_VX(vwmacc_vx_h, 4) 2038 GEN_VEXT_VX(vwmacc_vx_w, 8) 2039 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 2040 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 2041 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 2042 GEN_VEXT_VX(vwmaccus_vx_b, 2) 2043 GEN_VEXT_VX(vwmaccus_vx_h, 4) 2044 GEN_VEXT_VX(vwmaccus_vx_w, 8) 2045 2046 /* Vector Integer Merge and Move Instructions */ 2047 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 2048 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2049 uint32_t desc) \ 2050 { \ 2051 uint32_t vl = env->vl; \ 2052 uint32_t esz = sizeof(ETYPE); \ 2053 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2054 uint32_t vta = vext_vta(desc); \ 2055 uint32_t i; \ 2056 \ 2057 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2058 \ 2059 for (i = env->vstart; i < vl; i++) { \ 2060 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2061 *((ETYPE *)vd + H(i)) = s1; \ 2062 } \ 2063 env->vstart = 0; \ 2064 /* set tail elements to 1s */ \ 2065 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2066 } 2067 2068 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2069 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2070 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2071 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2072 2073 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2074 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2075 uint32_t desc) \ 2076 { \ 2077 uint32_t vl = env->vl; \ 2078 uint32_t esz = sizeof(ETYPE); \ 2079 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2080 uint32_t vta = vext_vta(desc); \ 2081 uint32_t i; \ 2082 \ 2083 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2084 \ 2085 for (i = env->vstart; i < vl; i++) { \ 2086 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2087 } \ 2088 env->vstart = 0; \ 2089 /* set tail elements to 1s */ \ 2090 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2091 } 2092 2093 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2094 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2095 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2096 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2097 2098 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2099 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2100 CPURISCVState *env, uint32_t desc) \ 2101 { \ 2102 uint32_t vl = env->vl; \ 2103 uint32_t esz = sizeof(ETYPE); \ 2104 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2105 uint32_t vta = vext_vta(desc); \ 2106 uint32_t i; \ 2107 \ 2108 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2109 \ 2110 for (i = env->vstart; i < vl; i++) { \ 2111 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2112 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2113 } \ 2114 env->vstart = 0; \ 2115 /* set tail elements to 1s */ \ 2116 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2117 } 2118 2119 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2120 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2121 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2122 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2123 2124 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2125 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2126 void *vs2, CPURISCVState *env, uint32_t desc) \ 2127 { \ 2128 uint32_t vl = env->vl; \ 2129 uint32_t esz = sizeof(ETYPE); \ 2130 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2131 uint32_t vta = vext_vta(desc); \ 2132 uint32_t i; \ 2133 \ 2134 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2135 \ 2136 for (i = env->vstart; i < vl; i++) { \ 2137 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2138 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2139 (ETYPE)(target_long)s1); \ 2140 *((ETYPE *)vd + H(i)) = d; \ 2141 } \ 2142 env->vstart = 0; \ 2143 /* set tail elements to 1s */ \ 2144 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2145 } 2146 2147 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2148 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2149 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2150 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2151 2152 /* 2153 * Vector Fixed-Point Arithmetic Instructions 2154 */ 2155 2156 /* Vector Single-Width Saturating Add and Subtract */ 2157 2158 /* 2159 * As fixed point instructions probably have round mode and saturation, 2160 * define common macros for fixed point here. 2161 */ 2162 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2163 CPURISCVState *env, int vxrm); 2164 2165 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2166 static inline void \ 2167 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2168 CPURISCVState *env, int vxrm) \ 2169 { \ 2170 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2171 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2172 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2173 } 2174 2175 static inline void 2176 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2177 CPURISCVState *env, 2178 uint32_t vl, uint32_t vm, int vxrm, 2179 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2180 { 2181 for (uint32_t i = env->vstart; i < vl; i++) { 2182 if (!vm && !vext_elem_mask(v0, i)) { 2183 /* set masked-off elements to 1s */ 2184 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2185 continue; 2186 } 2187 fn(vd, vs1, vs2, i, env, vxrm); 2188 } 2189 env->vstart = 0; 2190 } 2191 2192 static inline void 2193 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2194 CPURISCVState *env, 2195 uint32_t desc, 2196 opivv2_rm_fn *fn, uint32_t esz) 2197 { 2198 uint32_t vm = vext_vm(desc); 2199 uint32_t vl = env->vl; 2200 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2201 uint32_t vta = vext_vta(desc); 2202 uint32_t vma = vext_vma(desc); 2203 2204 VSTART_CHECK_EARLY_EXIT(env, vl); 2205 2206 switch (env->vxrm) { 2207 case 0: /* rnu */ 2208 vext_vv_rm_1(vd, v0, vs1, vs2, 2209 env, vl, vm, 0, fn, vma, esz); 2210 break; 2211 case 1: /* rne */ 2212 vext_vv_rm_1(vd, v0, vs1, vs2, 2213 env, vl, vm, 1, fn, vma, esz); 2214 break; 2215 case 2: /* rdn */ 2216 vext_vv_rm_1(vd, v0, vs1, vs2, 2217 env, vl, vm, 2, fn, vma, esz); 2218 break; 2219 default: /* rod */ 2220 vext_vv_rm_1(vd, v0, vs1, vs2, 2221 env, vl, vm, 3, fn, vma, esz); 2222 break; 2223 } 2224 /* set tail elements to 1s */ 2225 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2226 } 2227 2228 /* generate helpers for fixed point instructions with OPIVV format */ 2229 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2230 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2231 CPURISCVState *env, uint32_t desc) \ 2232 { \ 2233 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2234 do_##NAME, ESZ); \ 2235 } 2236 2237 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2238 uint8_t b) 2239 { 2240 uint8_t res = a + b; 2241 if (res < a) { 2242 res = UINT8_MAX; 2243 env->vxsat = 0x1; 2244 } 2245 return res; 2246 } 2247 2248 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2249 uint16_t b) 2250 { 2251 uint16_t res = a + b; 2252 if (res < a) { 2253 res = UINT16_MAX; 2254 env->vxsat = 0x1; 2255 } 2256 return res; 2257 } 2258 2259 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2260 uint32_t b) 2261 { 2262 uint32_t res = a + b; 2263 if (res < a) { 2264 res = UINT32_MAX; 2265 env->vxsat = 0x1; 2266 } 2267 return res; 2268 } 2269 2270 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2271 uint64_t b) 2272 { 2273 uint64_t res = a + b; 2274 if (res < a) { 2275 res = UINT64_MAX; 2276 env->vxsat = 0x1; 2277 } 2278 return res; 2279 } 2280 2281 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2282 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2283 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2284 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2285 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2286 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2287 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2288 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2289 2290 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2291 CPURISCVState *env, int vxrm); 2292 2293 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2294 static inline void \ 2295 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2296 CPURISCVState *env, int vxrm) \ 2297 { \ 2298 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2299 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2300 } 2301 2302 static inline void 2303 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2304 CPURISCVState *env, 2305 uint32_t vl, uint32_t vm, int vxrm, 2306 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2307 { 2308 for (uint32_t i = env->vstart; i < vl; i++) { 2309 if (!vm && !vext_elem_mask(v0, i)) { 2310 /* set masked-off elements to 1s */ 2311 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2312 continue; 2313 } 2314 fn(vd, s1, vs2, i, env, vxrm); 2315 } 2316 env->vstart = 0; 2317 } 2318 2319 static inline void 2320 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2321 CPURISCVState *env, 2322 uint32_t desc, 2323 opivx2_rm_fn *fn, uint32_t esz) 2324 { 2325 uint32_t vm = vext_vm(desc); 2326 uint32_t vl = env->vl; 2327 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2328 uint32_t vta = vext_vta(desc); 2329 uint32_t vma = vext_vma(desc); 2330 2331 VSTART_CHECK_EARLY_EXIT(env, vl); 2332 2333 switch (env->vxrm) { 2334 case 0: /* rnu */ 2335 vext_vx_rm_1(vd, v0, s1, vs2, 2336 env, vl, vm, 0, fn, vma, esz); 2337 break; 2338 case 1: /* rne */ 2339 vext_vx_rm_1(vd, v0, s1, vs2, 2340 env, vl, vm, 1, fn, vma, esz); 2341 break; 2342 case 2: /* rdn */ 2343 vext_vx_rm_1(vd, v0, s1, vs2, 2344 env, vl, vm, 2, fn, vma, esz); 2345 break; 2346 default: /* rod */ 2347 vext_vx_rm_1(vd, v0, s1, vs2, 2348 env, vl, vm, 3, fn, vma, esz); 2349 break; 2350 } 2351 /* set tail elements to 1s */ 2352 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2353 } 2354 2355 /* generate helpers for fixed point instructions with OPIVX format */ 2356 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2357 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2358 void *vs2, CPURISCVState *env, \ 2359 uint32_t desc) \ 2360 { \ 2361 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2362 do_##NAME, ESZ); \ 2363 } 2364 2365 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2366 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2367 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2368 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2369 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2370 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2371 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2372 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2373 2374 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2375 { 2376 int8_t res = a + b; 2377 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2378 res = a > 0 ? INT8_MAX : INT8_MIN; 2379 env->vxsat = 0x1; 2380 } 2381 return res; 2382 } 2383 2384 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2385 int16_t b) 2386 { 2387 int16_t res = a + b; 2388 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2389 res = a > 0 ? INT16_MAX : INT16_MIN; 2390 env->vxsat = 0x1; 2391 } 2392 return res; 2393 } 2394 2395 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2396 int32_t b) 2397 { 2398 int32_t res = a + b; 2399 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2400 res = a > 0 ? INT32_MAX : INT32_MIN; 2401 env->vxsat = 0x1; 2402 } 2403 return res; 2404 } 2405 2406 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2407 int64_t b) 2408 { 2409 int64_t res = a + b; 2410 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2411 res = a > 0 ? INT64_MAX : INT64_MIN; 2412 env->vxsat = 0x1; 2413 } 2414 return res; 2415 } 2416 2417 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2418 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2419 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2420 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2421 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2422 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2423 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2424 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2425 2426 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2427 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2428 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2429 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2430 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2431 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2432 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2433 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2434 2435 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2436 uint8_t b) 2437 { 2438 uint8_t res = a - b; 2439 if (res > a) { 2440 res = 0; 2441 env->vxsat = 0x1; 2442 } 2443 return res; 2444 } 2445 2446 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2447 uint16_t b) 2448 { 2449 uint16_t res = a - b; 2450 if (res > a) { 2451 res = 0; 2452 env->vxsat = 0x1; 2453 } 2454 return res; 2455 } 2456 2457 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2458 uint32_t b) 2459 { 2460 uint32_t res = a - b; 2461 if (res > a) { 2462 res = 0; 2463 env->vxsat = 0x1; 2464 } 2465 return res; 2466 } 2467 2468 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2469 uint64_t b) 2470 { 2471 uint64_t res = a - b; 2472 if (res > a) { 2473 res = 0; 2474 env->vxsat = 0x1; 2475 } 2476 return res; 2477 } 2478 2479 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2480 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2481 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2482 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2483 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2484 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2485 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2486 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2487 2488 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2489 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2490 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2491 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2492 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2493 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2494 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2495 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2496 2497 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2498 { 2499 int8_t res = a - b; 2500 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2501 res = a >= 0 ? INT8_MAX : INT8_MIN; 2502 env->vxsat = 0x1; 2503 } 2504 return res; 2505 } 2506 2507 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2508 int16_t b) 2509 { 2510 int16_t res = a - b; 2511 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2512 res = a >= 0 ? INT16_MAX : INT16_MIN; 2513 env->vxsat = 0x1; 2514 } 2515 return res; 2516 } 2517 2518 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2519 int32_t b) 2520 { 2521 int32_t res = a - b; 2522 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2523 res = a >= 0 ? INT32_MAX : INT32_MIN; 2524 env->vxsat = 0x1; 2525 } 2526 return res; 2527 } 2528 2529 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2530 int64_t b) 2531 { 2532 int64_t res = a - b; 2533 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2534 res = a >= 0 ? INT64_MAX : INT64_MIN; 2535 env->vxsat = 0x1; 2536 } 2537 return res; 2538 } 2539 2540 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2541 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2542 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2543 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2544 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2545 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2546 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2547 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2548 2549 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2550 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2551 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2552 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2553 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2554 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2555 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2556 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2557 2558 /* Vector Single-Width Averaging Add and Subtract */ 2559 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2560 { 2561 uint8_t d = extract64(v, shift, 1); 2562 uint8_t d1; 2563 uint64_t D1, D2; 2564 2565 if (shift == 0 || shift > 64) { 2566 return 0; 2567 } 2568 2569 d1 = extract64(v, shift - 1, 1); 2570 D1 = extract64(v, 0, shift); 2571 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2572 return d1; 2573 } else if (vxrm == 1) { /* round-to-nearest-even */ 2574 if (shift > 1) { 2575 D2 = extract64(v, 0, shift - 1); 2576 return d1 & ((D2 != 0) | d); 2577 } else { 2578 return d1 & d; 2579 } 2580 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2581 return !d & (D1 != 0); 2582 } 2583 return 0; /* round-down (truncate) */ 2584 } 2585 2586 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2587 int32_t b) 2588 { 2589 int64_t res = (int64_t)a + b; 2590 uint8_t round = get_round(vxrm, res, 1); 2591 2592 return (res >> 1) + round; 2593 } 2594 2595 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2596 int64_t b) 2597 { 2598 int64_t res = a + b; 2599 uint8_t round = get_round(vxrm, res, 1); 2600 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2601 2602 /* With signed overflow, bit 64 is inverse of bit 63. */ 2603 return ((res >> 1) ^ over) + round; 2604 } 2605 2606 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2607 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2608 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2609 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2610 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2611 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2612 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2613 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2614 2615 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2616 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2617 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2618 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2619 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2620 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2621 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2622 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2623 2624 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2625 uint32_t a, uint32_t b) 2626 { 2627 uint64_t res = (uint64_t)a + b; 2628 uint8_t round = get_round(vxrm, res, 1); 2629 2630 return (res >> 1) + round; 2631 } 2632 2633 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2634 uint64_t a, uint64_t b) 2635 { 2636 uint64_t res = a + b; 2637 uint8_t round = get_round(vxrm, res, 1); 2638 uint64_t over = (uint64_t)(res < a) << 63; 2639 2640 return ((res >> 1) | over) + round; 2641 } 2642 2643 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2644 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2645 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2646 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2647 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2648 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2649 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2650 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2651 2652 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2653 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2654 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2655 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2656 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2657 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2658 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2659 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2660 2661 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2662 int32_t b) 2663 { 2664 int64_t res = (int64_t)a - b; 2665 uint8_t round = get_round(vxrm, res, 1); 2666 2667 return (res >> 1) + round; 2668 } 2669 2670 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2671 int64_t b) 2672 { 2673 int64_t res = (int64_t)a - b; 2674 uint8_t round = get_round(vxrm, res, 1); 2675 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2676 2677 /* With signed overflow, bit 64 is inverse of bit 63. */ 2678 return ((res >> 1) ^ over) + round; 2679 } 2680 2681 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2682 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2683 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2684 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2685 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2686 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2687 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2688 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2689 2690 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2691 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2692 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2693 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2694 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2695 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2696 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2697 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2698 2699 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2700 uint32_t a, uint32_t b) 2701 { 2702 int64_t res = (int64_t)a - b; 2703 uint8_t round = get_round(vxrm, res, 1); 2704 2705 return (res >> 1) + round; 2706 } 2707 2708 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2709 uint64_t a, uint64_t b) 2710 { 2711 uint64_t res = (uint64_t)a - b; 2712 uint8_t round = get_round(vxrm, res, 1); 2713 uint64_t over = (uint64_t)(res > a) << 63; 2714 2715 return ((res >> 1) | over) + round; 2716 } 2717 2718 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2719 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2720 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2721 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2722 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2723 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2724 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2725 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2726 2727 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2728 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2729 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2730 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2731 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2732 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2733 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2734 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2735 2736 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2737 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2738 { 2739 uint8_t round; 2740 int16_t res; 2741 2742 res = (int16_t)a * (int16_t)b; 2743 round = get_round(vxrm, res, 7); 2744 res = (res >> 7) + round; 2745 2746 if (res > INT8_MAX) { 2747 env->vxsat = 0x1; 2748 return INT8_MAX; 2749 } else if (res < INT8_MIN) { 2750 env->vxsat = 0x1; 2751 return INT8_MIN; 2752 } else { 2753 return res; 2754 } 2755 } 2756 2757 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2758 { 2759 uint8_t round; 2760 int32_t res; 2761 2762 res = (int32_t)a * (int32_t)b; 2763 round = get_round(vxrm, res, 15); 2764 res = (res >> 15) + round; 2765 2766 if (res > INT16_MAX) { 2767 env->vxsat = 0x1; 2768 return INT16_MAX; 2769 } else if (res < INT16_MIN) { 2770 env->vxsat = 0x1; 2771 return INT16_MIN; 2772 } else { 2773 return res; 2774 } 2775 } 2776 2777 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2778 { 2779 uint8_t round; 2780 int64_t res; 2781 2782 res = (int64_t)a * (int64_t)b; 2783 round = get_round(vxrm, res, 31); 2784 res = (res >> 31) + round; 2785 2786 if (res > INT32_MAX) { 2787 env->vxsat = 0x1; 2788 return INT32_MAX; 2789 } else if (res < INT32_MIN) { 2790 env->vxsat = 0x1; 2791 return INT32_MIN; 2792 } else { 2793 return res; 2794 } 2795 } 2796 2797 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2798 { 2799 uint8_t round; 2800 uint64_t hi_64, lo_64; 2801 int64_t res; 2802 2803 if (a == INT64_MIN && b == INT64_MIN) { 2804 env->vxsat = 1; 2805 return INT64_MAX; 2806 } 2807 2808 muls64(&lo_64, &hi_64, a, b); 2809 round = get_round(vxrm, lo_64, 63); 2810 /* 2811 * Cannot overflow, as there are always 2812 * 2 sign bits after multiply. 2813 */ 2814 res = (hi_64 << 1) | (lo_64 >> 63); 2815 if (round) { 2816 if (res == INT64_MAX) { 2817 env->vxsat = 1; 2818 } else { 2819 res += 1; 2820 } 2821 } 2822 return res; 2823 } 2824 2825 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2826 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2827 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2828 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2829 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2830 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2831 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2832 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2833 2834 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2835 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2836 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2837 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2838 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2839 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2840 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2841 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2842 2843 /* Vector Single-Width Scaling Shift Instructions */ 2844 static inline uint8_t 2845 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2846 { 2847 uint8_t round, shift = b & 0x7; 2848 uint8_t res; 2849 2850 round = get_round(vxrm, a, shift); 2851 res = (a >> shift) + round; 2852 return res; 2853 } 2854 static inline uint16_t 2855 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2856 { 2857 uint8_t round, shift = b & 0xf; 2858 2859 round = get_round(vxrm, a, shift); 2860 return (a >> shift) + round; 2861 } 2862 static inline uint32_t 2863 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2864 { 2865 uint8_t round, shift = b & 0x1f; 2866 2867 round = get_round(vxrm, a, shift); 2868 return (a >> shift) + round; 2869 } 2870 static inline uint64_t 2871 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2872 { 2873 uint8_t round, shift = b & 0x3f; 2874 2875 round = get_round(vxrm, a, shift); 2876 return (a >> shift) + round; 2877 } 2878 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2879 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2880 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2881 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2882 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2883 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2884 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2885 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2886 2887 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2888 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2889 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2890 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2891 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2892 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2893 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2894 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2895 2896 static inline int8_t 2897 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2898 { 2899 uint8_t round, shift = b & 0x7; 2900 2901 round = get_round(vxrm, a, shift); 2902 return (a >> shift) + round; 2903 } 2904 static inline int16_t 2905 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2906 { 2907 uint8_t round, shift = b & 0xf; 2908 2909 round = get_round(vxrm, a, shift); 2910 return (a >> shift) + round; 2911 } 2912 static inline int32_t 2913 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2914 { 2915 uint8_t round, shift = b & 0x1f; 2916 2917 round = get_round(vxrm, a, shift); 2918 return (a >> shift) + round; 2919 } 2920 static inline int64_t 2921 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2922 { 2923 uint8_t round, shift = b & 0x3f; 2924 2925 round = get_round(vxrm, a, shift); 2926 return (a >> shift) + round; 2927 } 2928 2929 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2930 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2931 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2932 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2933 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2934 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2935 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2936 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2937 2938 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2939 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2940 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2941 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2942 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2943 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2944 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2945 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2946 2947 /* Vector Narrowing Fixed-Point Clip Instructions */ 2948 static inline int8_t 2949 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2950 { 2951 uint8_t round, shift = b & 0xf; 2952 int16_t res; 2953 2954 round = get_round(vxrm, a, shift); 2955 res = (a >> shift) + round; 2956 if (res > INT8_MAX) { 2957 env->vxsat = 0x1; 2958 return INT8_MAX; 2959 } else if (res < INT8_MIN) { 2960 env->vxsat = 0x1; 2961 return INT8_MIN; 2962 } else { 2963 return res; 2964 } 2965 } 2966 2967 static inline int16_t 2968 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2969 { 2970 uint8_t round, shift = b & 0x1f; 2971 int32_t res; 2972 2973 round = get_round(vxrm, a, shift); 2974 res = (a >> shift) + round; 2975 if (res > INT16_MAX) { 2976 env->vxsat = 0x1; 2977 return INT16_MAX; 2978 } else if (res < INT16_MIN) { 2979 env->vxsat = 0x1; 2980 return INT16_MIN; 2981 } else { 2982 return res; 2983 } 2984 } 2985 2986 static inline int32_t 2987 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2988 { 2989 uint8_t round, shift = b & 0x3f; 2990 int64_t res; 2991 2992 round = get_round(vxrm, a, shift); 2993 res = (a >> shift) + round; 2994 if (res > INT32_MAX) { 2995 env->vxsat = 0x1; 2996 return INT32_MAX; 2997 } else if (res < INT32_MIN) { 2998 env->vxsat = 0x1; 2999 return INT32_MIN; 3000 } else { 3001 return res; 3002 } 3003 } 3004 3005 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 3006 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 3007 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 3008 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 3009 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 3010 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 3011 3012 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 3013 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 3014 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 3015 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 3016 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 3017 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 3018 3019 static inline uint8_t 3020 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 3021 { 3022 uint8_t round, shift = b & 0xf; 3023 uint16_t res; 3024 3025 round = get_round(vxrm, a, shift); 3026 res = (a >> shift) + round; 3027 if (res > UINT8_MAX) { 3028 env->vxsat = 0x1; 3029 return UINT8_MAX; 3030 } else { 3031 return res; 3032 } 3033 } 3034 3035 static inline uint16_t 3036 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3037 { 3038 uint8_t round, shift = b & 0x1f; 3039 uint32_t res; 3040 3041 round = get_round(vxrm, a, shift); 3042 res = (a >> shift) + round; 3043 if (res > UINT16_MAX) { 3044 env->vxsat = 0x1; 3045 return UINT16_MAX; 3046 } else { 3047 return res; 3048 } 3049 } 3050 3051 static inline uint32_t 3052 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3053 { 3054 uint8_t round, shift = b & 0x3f; 3055 uint64_t res; 3056 3057 round = get_round(vxrm, a, shift); 3058 res = (a >> shift) + round; 3059 if (res > UINT32_MAX) { 3060 env->vxsat = 0x1; 3061 return UINT32_MAX; 3062 } else { 3063 return res; 3064 } 3065 } 3066 3067 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3068 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3069 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3070 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3071 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3072 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3073 3074 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3075 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3076 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3077 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3078 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3079 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3080 3081 /* 3082 * Vector Float Point Arithmetic Instructions 3083 */ 3084 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3085 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3086 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3087 CPURISCVState *env) \ 3088 { \ 3089 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3090 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3091 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3092 } 3093 3094 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3095 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3096 void *vs2, CPURISCVState *env, \ 3097 uint32_t desc) \ 3098 { \ 3099 uint32_t vm = vext_vm(desc); \ 3100 uint32_t vl = env->vl; \ 3101 uint32_t total_elems = \ 3102 vext_get_total_elems(env, desc, ESZ); \ 3103 uint32_t vta = vext_vta(desc); \ 3104 uint32_t vma = vext_vma(desc); \ 3105 uint32_t i; \ 3106 \ 3107 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3108 \ 3109 for (i = env->vstart; i < vl; i++) { \ 3110 if (!vm && !vext_elem_mask(v0, i)) { \ 3111 /* set masked-off elements to 1s */ \ 3112 vext_set_elems_1s(vd, vma, i * ESZ, \ 3113 (i + 1) * ESZ); \ 3114 continue; \ 3115 } \ 3116 do_##NAME(vd, vs1, vs2, i, env); \ 3117 } \ 3118 env->vstart = 0; \ 3119 /* set tail elements to 1s */ \ 3120 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3121 total_elems * ESZ); \ 3122 } 3123 3124 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3125 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3126 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3127 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3128 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3129 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3130 3131 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3132 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3133 CPURISCVState *env) \ 3134 { \ 3135 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3136 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3137 } 3138 3139 #define GEN_VEXT_VF(NAME, ESZ) \ 3140 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3141 void *vs2, CPURISCVState *env, \ 3142 uint32_t desc) \ 3143 { \ 3144 uint32_t vm = vext_vm(desc); \ 3145 uint32_t vl = env->vl; \ 3146 uint32_t total_elems = \ 3147 vext_get_total_elems(env, desc, ESZ); \ 3148 uint32_t vta = vext_vta(desc); \ 3149 uint32_t vma = vext_vma(desc); \ 3150 uint32_t i; \ 3151 \ 3152 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3153 \ 3154 for (i = env->vstart; i < vl; i++) { \ 3155 if (!vm && !vext_elem_mask(v0, i)) { \ 3156 /* set masked-off elements to 1s */ \ 3157 vext_set_elems_1s(vd, vma, i * ESZ, \ 3158 (i + 1) * ESZ); \ 3159 continue; \ 3160 } \ 3161 do_##NAME(vd, s1, vs2, i, env); \ 3162 } \ 3163 env->vstart = 0; \ 3164 /* set tail elements to 1s */ \ 3165 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3166 total_elems * ESZ); \ 3167 } 3168 3169 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3170 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3171 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3172 GEN_VEXT_VF(vfadd_vf_h, 2) 3173 GEN_VEXT_VF(vfadd_vf_w, 4) 3174 GEN_VEXT_VF(vfadd_vf_d, 8) 3175 3176 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3177 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3178 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3179 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3180 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3181 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3182 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3183 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3184 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3185 GEN_VEXT_VF(vfsub_vf_h, 2) 3186 GEN_VEXT_VF(vfsub_vf_w, 4) 3187 GEN_VEXT_VF(vfsub_vf_d, 8) 3188 3189 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3190 { 3191 return float16_sub(b, a, s); 3192 } 3193 3194 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3195 { 3196 return float32_sub(b, a, s); 3197 } 3198 3199 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3200 { 3201 return float64_sub(b, a, s); 3202 } 3203 3204 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3205 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3206 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3207 GEN_VEXT_VF(vfrsub_vf_h, 2) 3208 GEN_VEXT_VF(vfrsub_vf_w, 4) 3209 GEN_VEXT_VF(vfrsub_vf_d, 8) 3210 3211 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3212 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3213 { 3214 return float32_add(float16_to_float32(a, true, s), 3215 float16_to_float32(b, true, s), s); 3216 } 3217 3218 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3219 { 3220 return float64_add(float32_to_float64(a, s), 3221 float32_to_float64(b, s), s); 3222 3223 } 3224 3225 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3226 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3227 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3228 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3229 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3230 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3231 GEN_VEXT_VF(vfwadd_vf_h, 4) 3232 GEN_VEXT_VF(vfwadd_vf_w, 8) 3233 3234 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3235 { 3236 return float32_sub(float16_to_float32(a, true, s), 3237 float16_to_float32(b, true, s), s); 3238 } 3239 3240 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3241 { 3242 return float64_sub(float32_to_float64(a, s), 3243 float32_to_float64(b, s), s); 3244 3245 } 3246 3247 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3248 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3249 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3250 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3251 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3252 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3253 GEN_VEXT_VF(vfwsub_vf_h, 4) 3254 GEN_VEXT_VF(vfwsub_vf_w, 8) 3255 3256 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3257 { 3258 return float32_add(a, float16_to_float32(b, true, s), s); 3259 } 3260 3261 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3262 { 3263 return float64_add(a, float32_to_float64(b, s), s); 3264 } 3265 3266 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3267 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3268 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3269 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3270 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3271 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3272 GEN_VEXT_VF(vfwadd_wf_h, 4) 3273 GEN_VEXT_VF(vfwadd_wf_w, 8) 3274 3275 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3276 { 3277 return float32_sub(a, float16_to_float32(b, true, s), s); 3278 } 3279 3280 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3281 { 3282 return float64_sub(a, float32_to_float64(b, s), s); 3283 } 3284 3285 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3286 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3287 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3288 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3289 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3290 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3291 GEN_VEXT_VF(vfwsub_wf_h, 4) 3292 GEN_VEXT_VF(vfwsub_wf_w, 8) 3293 3294 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3295 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3296 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3297 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3298 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3299 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3300 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3301 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3302 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3303 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3304 GEN_VEXT_VF(vfmul_vf_h, 2) 3305 GEN_VEXT_VF(vfmul_vf_w, 4) 3306 GEN_VEXT_VF(vfmul_vf_d, 8) 3307 3308 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3309 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3310 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3311 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3312 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3313 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3314 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3315 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3316 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3317 GEN_VEXT_VF(vfdiv_vf_h, 2) 3318 GEN_VEXT_VF(vfdiv_vf_w, 4) 3319 GEN_VEXT_VF(vfdiv_vf_d, 8) 3320 3321 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3322 { 3323 return float16_div(b, a, s); 3324 } 3325 3326 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3327 { 3328 return float32_div(b, a, s); 3329 } 3330 3331 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3332 { 3333 return float64_div(b, a, s); 3334 } 3335 3336 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3337 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3338 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3339 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3340 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3341 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3342 3343 /* Vector Widening Floating-Point Multiply */ 3344 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3345 { 3346 return float32_mul(float16_to_float32(a, true, s), 3347 float16_to_float32(b, true, s), s); 3348 } 3349 3350 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3351 { 3352 return float64_mul(float32_to_float64(a, s), 3353 float32_to_float64(b, s), s); 3354 3355 } 3356 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3357 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3358 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3359 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3360 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3361 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3362 GEN_VEXT_VF(vfwmul_vf_h, 4) 3363 GEN_VEXT_VF(vfwmul_vf_w, 8) 3364 3365 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3366 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3367 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3368 CPURISCVState *env) \ 3369 { \ 3370 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3371 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3372 TD d = *((TD *)vd + HD(i)); \ 3373 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3374 } 3375 3376 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3377 { 3378 return float16_muladd(a, b, d, 0, s); 3379 } 3380 3381 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3382 { 3383 return float32_muladd(a, b, d, 0, s); 3384 } 3385 3386 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3387 { 3388 return float64_muladd(a, b, d, 0, s); 3389 } 3390 3391 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3392 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3393 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3394 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3395 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3396 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3397 3398 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3399 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3400 CPURISCVState *env) \ 3401 { \ 3402 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3403 TD d = *((TD *)vd + HD(i)); \ 3404 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3405 } 3406 3407 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3408 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3409 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3410 GEN_VEXT_VF(vfmacc_vf_h, 2) 3411 GEN_VEXT_VF(vfmacc_vf_w, 4) 3412 GEN_VEXT_VF(vfmacc_vf_d, 8) 3413 3414 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3415 { 3416 return float16_muladd(a, b, d, float_muladd_negate_c | 3417 float_muladd_negate_product, s); 3418 } 3419 3420 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3421 { 3422 return float32_muladd(a, b, d, float_muladd_negate_c | 3423 float_muladd_negate_product, s); 3424 } 3425 3426 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3427 { 3428 return float64_muladd(a, b, d, float_muladd_negate_c | 3429 float_muladd_negate_product, s); 3430 } 3431 3432 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3433 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3434 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3435 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3436 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3437 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3438 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3439 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3440 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3441 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3442 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3443 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3444 3445 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3446 { 3447 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3448 } 3449 3450 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3451 { 3452 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3453 } 3454 3455 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3456 { 3457 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3458 } 3459 3460 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3461 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3462 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3463 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3464 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3465 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3466 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3467 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3468 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3469 GEN_VEXT_VF(vfmsac_vf_h, 2) 3470 GEN_VEXT_VF(vfmsac_vf_w, 4) 3471 GEN_VEXT_VF(vfmsac_vf_d, 8) 3472 3473 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3474 { 3475 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3476 } 3477 3478 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3479 { 3480 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3481 } 3482 3483 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3484 { 3485 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3486 } 3487 3488 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3489 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3490 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3491 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3492 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3493 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3494 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3495 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3496 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3497 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3498 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3499 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3500 3501 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3502 { 3503 return float16_muladd(d, b, a, 0, s); 3504 } 3505 3506 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3507 { 3508 return float32_muladd(d, b, a, 0, s); 3509 } 3510 3511 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3512 { 3513 return float64_muladd(d, b, a, 0, s); 3514 } 3515 3516 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3517 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3518 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3519 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3520 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3521 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3522 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3523 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3524 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3525 GEN_VEXT_VF(vfmadd_vf_h, 2) 3526 GEN_VEXT_VF(vfmadd_vf_w, 4) 3527 GEN_VEXT_VF(vfmadd_vf_d, 8) 3528 3529 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3530 { 3531 return float16_muladd(d, b, a, float_muladd_negate_c | 3532 float_muladd_negate_product, s); 3533 } 3534 3535 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3536 { 3537 return float32_muladd(d, b, a, float_muladd_negate_c | 3538 float_muladd_negate_product, s); 3539 } 3540 3541 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3542 { 3543 return float64_muladd(d, b, a, float_muladd_negate_c | 3544 float_muladd_negate_product, s); 3545 } 3546 3547 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3548 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3549 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3550 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3551 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3552 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3553 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3554 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3555 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3556 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3557 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3558 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3559 3560 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3561 { 3562 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3563 } 3564 3565 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3566 { 3567 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3568 } 3569 3570 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3571 { 3572 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3573 } 3574 3575 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3576 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3577 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3578 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3579 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3580 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3581 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3582 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3583 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3584 GEN_VEXT_VF(vfmsub_vf_h, 2) 3585 GEN_VEXT_VF(vfmsub_vf_w, 4) 3586 GEN_VEXT_VF(vfmsub_vf_d, 8) 3587 3588 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3589 { 3590 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3591 } 3592 3593 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3594 { 3595 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3596 } 3597 3598 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3599 { 3600 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3601 } 3602 3603 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3604 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3605 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3606 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3607 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3608 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3609 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3610 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3611 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3612 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3613 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3614 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3615 3616 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3617 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3618 { 3619 return float32_muladd(float16_to_float32(a, true, s), 3620 float16_to_float32(b, true, s), d, 0, s); 3621 } 3622 3623 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3624 { 3625 return float64_muladd(float32_to_float64(a, s), 3626 float32_to_float64(b, s), d, 0, s); 3627 } 3628 3629 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3630 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3631 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3632 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3633 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3634 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3635 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3636 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3637 3638 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3639 { 3640 return float32_muladd(bfloat16_to_float32(a, s), 3641 bfloat16_to_float32(b, s), d, 0, s); 3642 } 3643 3644 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3645 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3646 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3647 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3648 3649 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3650 { 3651 return float32_muladd(float16_to_float32(a, true, s), 3652 float16_to_float32(b, true, s), d, 3653 float_muladd_negate_c | float_muladd_negate_product, 3654 s); 3655 } 3656 3657 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3658 { 3659 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3660 d, float_muladd_negate_c | 3661 float_muladd_negate_product, s); 3662 } 3663 3664 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3665 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3666 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3667 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3668 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3669 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3670 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3671 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3672 3673 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3674 { 3675 return float32_muladd(float16_to_float32(a, true, s), 3676 float16_to_float32(b, true, s), d, 3677 float_muladd_negate_c, s); 3678 } 3679 3680 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3681 { 3682 return float64_muladd(float32_to_float64(a, s), 3683 float32_to_float64(b, s), d, 3684 float_muladd_negate_c, s); 3685 } 3686 3687 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3688 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3689 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3690 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3691 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3692 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3693 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3694 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3695 3696 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3697 { 3698 return float32_muladd(float16_to_float32(a, true, s), 3699 float16_to_float32(b, true, s), d, 3700 float_muladd_negate_product, s); 3701 } 3702 3703 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3704 { 3705 return float64_muladd(float32_to_float64(a, s), 3706 float32_to_float64(b, s), d, 3707 float_muladd_negate_product, s); 3708 } 3709 3710 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3711 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3712 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3713 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3714 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3715 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3716 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3717 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3718 3719 /* Vector Floating-Point Square-Root Instruction */ 3720 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3721 static void do_##NAME(void *vd, void *vs2, int i, \ 3722 CPURISCVState *env) \ 3723 { \ 3724 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3725 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3726 } 3727 3728 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3729 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3730 CPURISCVState *env, uint32_t desc) \ 3731 { \ 3732 uint32_t vm = vext_vm(desc); \ 3733 uint32_t vl = env->vl; \ 3734 uint32_t total_elems = \ 3735 vext_get_total_elems(env, desc, ESZ); \ 3736 uint32_t vta = vext_vta(desc); \ 3737 uint32_t vma = vext_vma(desc); \ 3738 uint32_t i; \ 3739 \ 3740 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3741 \ 3742 if (vl == 0) { \ 3743 return; \ 3744 } \ 3745 for (i = env->vstart; i < vl; i++) { \ 3746 if (!vm && !vext_elem_mask(v0, i)) { \ 3747 /* set masked-off elements to 1s */ \ 3748 vext_set_elems_1s(vd, vma, i * ESZ, \ 3749 (i + 1) * ESZ); \ 3750 continue; \ 3751 } \ 3752 do_##NAME(vd, vs2, i, env); \ 3753 } \ 3754 env->vstart = 0; \ 3755 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3756 total_elems * ESZ); \ 3757 } 3758 3759 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3760 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3761 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3762 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3763 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3764 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3765 3766 /* 3767 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3768 * 3769 * Adapted from riscv-v-spec recip.c: 3770 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3771 */ 3772 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3773 { 3774 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3775 uint64_t exp = extract64(f, frac_size, exp_size); 3776 uint64_t frac = extract64(f, 0, frac_size); 3777 3778 const uint8_t lookup_table[] = { 3779 52, 51, 50, 48, 47, 46, 44, 43, 3780 42, 41, 40, 39, 38, 36, 35, 34, 3781 33, 32, 31, 30, 30, 29, 28, 27, 3782 26, 25, 24, 23, 23, 22, 21, 20, 3783 19, 19, 18, 17, 16, 16, 15, 14, 3784 14, 13, 12, 12, 11, 10, 10, 9, 3785 9, 8, 7, 7, 6, 6, 5, 4, 3786 4, 3, 3, 2, 2, 1, 1, 0, 3787 127, 125, 123, 121, 119, 118, 116, 114, 3788 113, 111, 109, 108, 106, 105, 103, 102, 3789 100, 99, 97, 96, 95, 93, 92, 91, 3790 90, 88, 87, 86, 85, 84, 83, 82, 3791 80, 79, 78, 77, 76, 75, 74, 73, 3792 72, 71, 70, 70, 69, 68, 67, 66, 3793 65, 64, 63, 63, 62, 61, 60, 59, 3794 59, 58, 57, 56, 56, 55, 54, 53 3795 }; 3796 const int precision = 7; 3797 3798 if (exp == 0 && frac != 0) { /* subnormal */ 3799 /* Normalize the subnormal. */ 3800 while (extract64(frac, frac_size - 1, 1) == 0) { 3801 exp--; 3802 frac <<= 1; 3803 } 3804 3805 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3806 } 3807 3808 int idx = ((exp & 1) << (precision - 1)) | 3809 (frac >> (frac_size - precision + 1)); 3810 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3811 (frac_size - precision); 3812 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3813 3814 uint64_t val = 0; 3815 val = deposit64(val, 0, frac_size, out_frac); 3816 val = deposit64(val, frac_size, exp_size, out_exp); 3817 val = deposit64(val, frac_size + exp_size, 1, sign); 3818 return val; 3819 } 3820 3821 static float16 frsqrt7_h(float16 f, float_status *s) 3822 { 3823 int exp_size = 5, frac_size = 10; 3824 bool sign = float16_is_neg(f); 3825 3826 /* 3827 * frsqrt7(sNaN) = canonical NaN 3828 * frsqrt7(-inf) = canonical NaN 3829 * frsqrt7(-normal) = canonical NaN 3830 * frsqrt7(-subnormal) = canonical NaN 3831 */ 3832 if (float16_is_signaling_nan(f, s) || 3833 (float16_is_infinity(f) && sign) || 3834 (float16_is_normal(f) && sign) || 3835 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3836 s->float_exception_flags |= float_flag_invalid; 3837 return float16_default_nan(s); 3838 } 3839 3840 /* frsqrt7(qNaN) = canonical NaN */ 3841 if (float16_is_quiet_nan(f, s)) { 3842 return float16_default_nan(s); 3843 } 3844 3845 /* frsqrt7(+-0) = +-inf */ 3846 if (float16_is_zero(f)) { 3847 s->float_exception_flags |= float_flag_divbyzero; 3848 return float16_set_sign(float16_infinity, sign); 3849 } 3850 3851 /* frsqrt7(+inf) = +0 */ 3852 if (float16_is_infinity(f) && !sign) { 3853 return float16_set_sign(float16_zero, sign); 3854 } 3855 3856 /* +normal, +subnormal */ 3857 uint64_t val = frsqrt7(f, exp_size, frac_size); 3858 return make_float16(val); 3859 } 3860 3861 static float32 frsqrt7_s(float32 f, float_status *s) 3862 { 3863 int exp_size = 8, frac_size = 23; 3864 bool sign = float32_is_neg(f); 3865 3866 /* 3867 * frsqrt7(sNaN) = canonical NaN 3868 * frsqrt7(-inf) = canonical NaN 3869 * frsqrt7(-normal) = canonical NaN 3870 * frsqrt7(-subnormal) = canonical NaN 3871 */ 3872 if (float32_is_signaling_nan(f, s) || 3873 (float32_is_infinity(f) && sign) || 3874 (float32_is_normal(f) && sign) || 3875 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3876 s->float_exception_flags |= float_flag_invalid; 3877 return float32_default_nan(s); 3878 } 3879 3880 /* frsqrt7(qNaN) = canonical NaN */ 3881 if (float32_is_quiet_nan(f, s)) { 3882 return float32_default_nan(s); 3883 } 3884 3885 /* frsqrt7(+-0) = +-inf */ 3886 if (float32_is_zero(f)) { 3887 s->float_exception_flags |= float_flag_divbyzero; 3888 return float32_set_sign(float32_infinity, sign); 3889 } 3890 3891 /* frsqrt7(+inf) = +0 */ 3892 if (float32_is_infinity(f) && !sign) { 3893 return float32_set_sign(float32_zero, sign); 3894 } 3895 3896 /* +normal, +subnormal */ 3897 uint64_t val = frsqrt7(f, exp_size, frac_size); 3898 return make_float32(val); 3899 } 3900 3901 static float64 frsqrt7_d(float64 f, float_status *s) 3902 { 3903 int exp_size = 11, frac_size = 52; 3904 bool sign = float64_is_neg(f); 3905 3906 /* 3907 * frsqrt7(sNaN) = canonical NaN 3908 * frsqrt7(-inf) = canonical NaN 3909 * frsqrt7(-normal) = canonical NaN 3910 * frsqrt7(-subnormal) = canonical NaN 3911 */ 3912 if (float64_is_signaling_nan(f, s) || 3913 (float64_is_infinity(f) && sign) || 3914 (float64_is_normal(f) && sign) || 3915 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3916 s->float_exception_flags |= float_flag_invalid; 3917 return float64_default_nan(s); 3918 } 3919 3920 /* frsqrt7(qNaN) = canonical NaN */ 3921 if (float64_is_quiet_nan(f, s)) { 3922 return float64_default_nan(s); 3923 } 3924 3925 /* frsqrt7(+-0) = +-inf */ 3926 if (float64_is_zero(f)) { 3927 s->float_exception_flags |= float_flag_divbyzero; 3928 return float64_set_sign(float64_infinity, sign); 3929 } 3930 3931 /* frsqrt7(+inf) = +0 */ 3932 if (float64_is_infinity(f) && !sign) { 3933 return float64_set_sign(float64_zero, sign); 3934 } 3935 3936 /* +normal, +subnormal */ 3937 uint64_t val = frsqrt7(f, exp_size, frac_size); 3938 return make_float64(val); 3939 } 3940 3941 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3942 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3943 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3944 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3945 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3946 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3947 3948 /* 3949 * Vector Floating-Point Reciprocal Estimate Instruction 3950 * 3951 * Adapted from riscv-v-spec recip.c: 3952 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3953 */ 3954 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3955 float_status *s) 3956 { 3957 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3958 uint64_t exp = extract64(f, frac_size, exp_size); 3959 uint64_t frac = extract64(f, 0, frac_size); 3960 3961 const uint8_t lookup_table[] = { 3962 127, 125, 123, 121, 119, 117, 116, 114, 3963 112, 110, 109, 107, 105, 104, 102, 100, 3964 99, 97, 96, 94, 93, 91, 90, 88, 3965 87, 85, 84, 83, 81, 80, 79, 77, 3966 76, 75, 74, 72, 71, 70, 69, 68, 3967 66, 65, 64, 63, 62, 61, 60, 59, 3968 58, 57, 56, 55, 54, 53, 52, 51, 3969 50, 49, 48, 47, 46, 45, 44, 43, 3970 42, 41, 40, 40, 39, 38, 37, 36, 3971 35, 35, 34, 33, 32, 31, 31, 30, 3972 29, 28, 28, 27, 26, 25, 25, 24, 3973 23, 23, 22, 21, 21, 20, 19, 19, 3974 18, 17, 17, 16, 15, 15, 14, 14, 3975 13, 12, 12, 11, 11, 10, 9, 9, 3976 8, 8, 7, 7, 6, 5, 5, 4, 3977 4, 3, 3, 2, 2, 1, 1, 0 3978 }; 3979 const int precision = 7; 3980 3981 if (exp == 0 && frac != 0) { /* subnormal */ 3982 /* Normalize the subnormal. */ 3983 while (extract64(frac, frac_size - 1, 1) == 0) { 3984 exp--; 3985 frac <<= 1; 3986 } 3987 3988 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3989 3990 if (exp != 0 && exp != UINT64_MAX) { 3991 /* 3992 * Overflow to inf or max value of same sign, 3993 * depending on sign and rounding mode. 3994 */ 3995 s->float_exception_flags |= (float_flag_inexact | 3996 float_flag_overflow); 3997 3998 if ((s->float_rounding_mode == float_round_to_zero) || 3999 ((s->float_rounding_mode == float_round_down) && !sign) || 4000 ((s->float_rounding_mode == float_round_up) && sign)) { 4001 /* Return greatest/negative finite value. */ 4002 return (sign << (exp_size + frac_size)) | 4003 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 4004 } else { 4005 /* Return +-inf. */ 4006 return (sign << (exp_size + frac_size)) | 4007 MAKE_64BIT_MASK(frac_size, exp_size); 4008 } 4009 } 4010 } 4011 4012 int idx = frac >> (frac_size - precision); 4013 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 4014 (frac_size - precision); 4015 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 4016 4017 if (out_exp == 0 || out_exp == UINT64_MAX) { 4018 /* 4019 * The result is subnormal, but don't raise the underflow exception, 4020 * because there's no additional loss of precision. 4021 */ 4022 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 4023 if (out_exp == UINT64_MAX) { 4024 out_frac >>= 1; 4025 out_exp = 0; 4026 } 4027 } 4028 4029 uint64_t val = 0; 4030 val = deposit64(val, 0, frac_size, out_frac); 4031 val = deposit64(val, frac_size, exp_size, out_exp); 4032 val = deposit64(val, frac_size + exp_size, 1, sign); 4033 return val; 4034 } 4035 4036 static float16 frec7_h(float16 f, float_status *s) 4037 { 4038 int exp_size = 5, frac_size = 10; 4039 bool sign = float16_is_neg(f); 4040 4041 /* frec7(+-inf) = +-0 */ 4042 if (float16_is_infinity(f)) { 4043 return float16_set_sign(float16_zero, sign); 4044 } 4045 4046 /* frec7(+-0) = +-inf */ 4047 if (float16_is_zero(f)) { 4048 s->float_exception_flags |= float_flag_divbyzero; 4049 return float16_set_sign(float16_infinity, sign); 4050 } 4051 4052 /* frec7(sNaN) = canonical NaN */ 4053 if (float16_is_signaling_nan(f, s)) { 4054 s->float_exception_flags |= float_flag_invalid; 4055 return float16_default_nan(s); 4056 } 4057 4058 /* frec7(qNaN) = canonical NaN */ 4059 if (float16_is_quiet_nan(f, s)) { 4060 return float16_default_nan(s); 4061 } 4062 4063 /* +-normal, +-subnormal */ 4064 uint64_t val = frec7(f, exp_size, frac_size, s); 4065 return make_float16(val); 4066 } 4067 4068 static float32 frec7_s(float32 f, float_status *s) 4069 { 4070 int exp_size = 8, frac_size = 23; 4071 bool sign = float32_is_neg(f); 4072 4073 /* frec7(+-inf) = +-0 */ 4074 if (float32_is_infinity(f)) { 4075 return float32_set_sign(float32_zero, sign); 4076 } 4077 4078 /* frec7(+-0) = +-inf */ 4079 if (float32_is_zero(f)) { 4080 s->float_exception_flags |= float_flag_divbyzero; 4081 return float32_set_sign(float32_infinity, sign); 4082 } 4083 4084 /* frec7(sNaN) = canonical NaN */ 4085 if (float32_is_signaling_nan(f, s)) { 4086 s->float_exception_flags |= float_flag_invalid; 4087 return float32_default_nan(s); 4088 } 4089 4090 /* frec7(qNaN) = canonical NaN */ 4091 if (float32_is_quiet_nan(f, s)) { 4092 return float32_default_nan(s); 4093 } 4094 4095 /* +-normal, +-subnormal */ 4096 uint64_t val = frec7(f, exp_size, frac_size, s); 4097 return make_float32(val); 4098 } 4099 4100 static float64 frec7_d(float64 f, float_status *s) 4101 { 4102 int exp_size = 11, frac_size = 52; 4103 bool sign = float64_is_neg(f); 4104 4105 /* frec7(+-inf) = +-0 */ 4106 if (float64_is_infinity(f)) { 4107 return float64_set_sign(float64_zero, sign); 4108 } 4109 4110 /* frec7(+-0) = +-inf */ 4111 if (float64_is_zero(f)) { 4112 s->float_exception_flags |= float_flag_divbyzero; 4113 return float64_set_sign(float64_infinity, sign); 4114 } 4115 4116 /* frec7(sNaN) = canonical NaN */ 4117 if (float64_is_signaling_nan(f, s)) { 4118 s->float_exception_flags |= float_flag_invalid; 4119 return float64_default_nan(s); 4120 } 4121 4122 /* frec7(qNaN) = canonical NaN */ 4123 if (float64_is_quiet_nan(f, s)) { 4124 return float64_default_nan(s); 4125 } 4126 4127 /* +-normal, +-subnormal */ 4128 uint64_t val = frec7(f, exp_size, frac_size, s); 4129 return make_float64(val); 4130 } 4131 4132 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4133 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4134 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4135 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4136 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4137 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4138 4139 /* Vector Floating-Point MIN/MAX Instructions */ 4140 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4141 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4142 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4143 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4144 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4145 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4146 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4147 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4148 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4149 GEN_VEXT_VF(vfmin_vf_h, 2) 4150 GEN_VEXT_VF(vfmin_vf_w, 4) 4151 GEN_VEXT_VF(vfmin_vf_d, 8) 4152 4153 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4154 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4155 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4156 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4157 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4158 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4159 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4160 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4161 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4162 GEN_VEXT_VF(vfmax_vf_h, 2) 4163 GEN_VEXT_VF(vfmax_vf_w, 4) 4164 GEN_VEXT_VF(vfmax_vf_d, 8) 4165 4166 /* Vector Floating-Point Sign-Injection Instructions */ 4167 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4168 { 4169 return deposit64(b, 0, 15, a); 4170 } 4171 4172 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4173 { 4174 return deposit64(b, 0, 31, a); 4175 } 4176 4177 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4178 { 4179 return deposit64(b, 0, 63, a); 4180 } 4181 4182 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4183 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4184 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4185 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4186 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4187 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4188 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4189 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4190 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4191 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4192 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4193 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4194 4195 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4196 { 4197 return deposit64(~b, 0, 15, a); 4198 } 4199 4200 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4201 { 4202 return deposit64(~b, 0, 31, a); 4203 } 4204 4205 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4206 { 4207 return deposit64(~b, 0, 63, a); 4208 } 4209 4210 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4211 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4212 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4213 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4214 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4215 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4216 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4217 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4218 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4219 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4220 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4221 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4222 4223 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4224 { 4225 return deposit64(b ^ a, 0, 15, a); 4226 } 4227 4228 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4229 { 4230 return deposit64(b ^ a, 0, 31, a); 4231 } 4232 4233 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4234 { 4235 return deposit64(b ^ a, 0, 63, a); 4236 } 4237 4238 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4239 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4240 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4241 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4242 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4243 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4244 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4245 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4246 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4247 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4248 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4249 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4250 4251 /* Vector Floating-Point Compare Instructions */ 4252 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4253 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4254 CPURISCVState *env, uint32_t desc) \ 4255 { \ 4256 uint32_t vm = vext_vm(desc); \ 4257 uint32_t vl = env->vl; \ 4258 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4259 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4260 uint32_t vma = vext_vma(desc); \ 4261 uint32_t i; \ 4262 \ 4263 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4264 \ 4265 for (i = env->vstart; i < vl; i++) { \ 4266 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4267 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4268 if (!vm && !vext_elem_mask(v0, i)) { \ 4269 /* set masked-off elements to 1s */ \ 4270 if (vma) { \ 4271 vext_set_elem_mask(vd, i, 1); \ 4272 } \ 4273 continue; \ 4274 } \ 4275 vext_set_elem_mask(vd, i, \ 4276 DO_OP(s2, s1, &env->fp_status)); \ 4277 } \ 4278 env->vstart = 0; \ 4279 /* 4280 * mask destination register are always tail-agnostic 4281 * set tail elements to 1s 4282 */ \ 4283 if (vta_all_1s) { \ 4284 for (; i < total_elems; i++) { \ 4285 vext_set_elem_mask(vd, i, 1); \ 4286 } \ 4287 } \ 4288 } 4289 4290 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4291 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4292 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4293 4294 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4295 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4296 CPURISCVState *env, uint32_t desc) \ 4297 { \ 4298 uint32_t vm = vext_vm(desc); \ 4299 uint32_t vl = env->vl; \ 4300 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4301 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4302 uint32_t vma = vext_vma(desc); \ 4303 uint32_t i; \ 4304 \ 4305 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4306 \ 4307 for (i = env->vstart; i < vl; i++) { \ 4308 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4309 if (!vm && !vext_elem_mask(v0, i)) { \ 4310 /* set masked-off elements to 1s */ \ 4311 if (vma) { \ 4312 vext_set_elem_mask(vd, i, 1); \ 4313 } \ 4314 continue; \ 4315 } \ 4316 vext_set_elem_mask(vd, i, \ 4317 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4318 } \ 4319 env->vstart = 0; \ 4320 /* 4321 * mask destination register are always tail-agnostic 4322 * set tail elements to 1s 4323 */ \ 4324 if (vta_all_1s) { \ 4325 for (; i < total_elems; i++) { \ 4326 vext_set_elem_mask(vd, i, 1); \ 4327 } \ 4328 } \ 4329 } 4330 4331 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4332 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4333 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4334 4335 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4336 { 4337 FloatRelation compare = float16_compare_quiet(a, b, s); 4338 return compare != float_relation_equal; 4339 } 4340 4341 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4342 { 4343 FloatRelation compare = float32_compare_quiet(a, b, s); 4344 return compare != float_relation_equal; 4345 } 4346 4347 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4348 { 4349 FloatRelation compare = float64_compare_quiet(a, b, s); 4350 return compare != float_relation_equal; 4351 } 4352 4353 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4354 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4355 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4356 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4357 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4358 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4359 4360 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4361 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4362 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4363 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4364 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4365 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4366 4367 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4368 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4369 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4370 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4371 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4372 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4373 4374 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4375 { 4376 FloatRelation compare = float16_compare(a, b, s); 4377 return compare == float_relation_greater; 4378 } 4379 4380 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4381 { 4382 FloatRelation compare = float32_compare(a, b, s); 4383 return compare == float_relation_greater; 4384 } 4385 4386 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4387 { 4388 FloatRelation compare = float64_compare(a, b, s); 4389 return compare == float_relation_greater; 4390 } 4391 4392 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4393 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4394 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4395 4396 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4397 { 4398 FloatRelation compare = float16_compare(a, b, s); 4399 return compare == float_relation_greater || 4400 compare == float_relation_equal; 4401 } 4402 4403 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4404 { 4405 FloatRelation compare = float32_compare(a, b, s); 4406 return compare == float_relation_greater || 4407 compare == float_relation_equal; 4408 } 4409 4410 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4411 { 4412 FloatRelation compare = float64_compare(a, b, s); 4413 return compare == float_relation_greater || 4414 compare == float_relation_equal; 4415 } 4416 4417 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4418 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4419 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4420 4421 /* Vector Floating-Point Classify Instruction */ 4422 target_ulong fclass_h(uint64_t frs1) 4423 { 4424 float16 f = frs1; 4425 bool sign = float16_is_neg(f); 4426 4427 if (float16_is_infinity(f)) { 4428 return sign ? 1 << 0 : 1 << 7; 4429 } else if (float16_is_zero(f)) { 4430 return sign ? 1 << 3 : 1 << 4; 4431 } else if (float16_is_zero_or_denormal(f)) { 4432 return sign ? 1 << 2 : 1 << 5; 4433 } else if (float16_is_any_nan(f)) { 4434 float_status s = { }; /* for snan_bit_is_one */ 4435 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4436 } else { 4437 return sign ? 1 << 1 : 1 << 6; 4438 } 4439 } 4440 4441 target_ulong fclass_s(uint64_t frs1) 4442 { 4443 float32 f = frs1; 4444 bool sign = float32_is_neg(f); 4445 4446 if (float32_is_infinity(f)) { 4447 return sign ? 1 << 0 : 1 << 7; 4448 } else if (float32_is_zero(f)) { 4449 return sign ? 1 << 3 : 1 << 4; 4450 } else if (float32_is_zero_or_denormal(f)) { 4451 return sign ? 1 << 2 : 1 << 5; 4452 } else if (float32_is_any_nan(f)) { 4453 float_status s = { }; /* for snan_bit_is_one */ 4454 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4455 } else { 4456 return sign ? 1 << 1 : 1 << 6; 4457 } 4458 } 4459 4460 target_ulong fclass_d(uint64_t frs1) 4461 { 4462 float64 f = frs1; 4463 bool sign = float64_is_neg(f); 4464 4465 if (float64_is_infinity(f)) { 4466 return sign ? 1 << 0 : 1 << 7; 4467 } else if (float64_is_zero(f)) { 4468 return sign ? 1 << 3 : 1 << 4; 4469 } else if (float64_is_zero_or_denormal(f)) { 4470 return sign ? 1 << 2 : 1 << 5; 4471 } else if (float64_is_any_nan(f)) { 4472 float_status s = { }; /* for snan_bit_is_one */ 4473 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4474 } else { 4475 return sign ? 1 << 1 : 1 << 6; 4476 } 4477 } 4478 4479 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4480 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4481 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4482 GEN_VEXT_V(vfclass_v_h, 2) 4483 GEN_VEXT_V(vfclass_v_w, 4) 4484 GEN_VEXT_V(vfclass_v_d, 8) 4485 4486 /* Vector Floating-Point Merge Instruction */ 4487 4488 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4489 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4490 CPURISCVState *env, uint32_t desc) \ 4491 { \ 4492 uint32_t vm = vext_vm(desc); \ 4493 uint32_t vl = env->vl; \ 4494 uint32_t esz = sizeof(ETYPE); \ 4495 uint32_t total_elems = \ 4496 vext_get_total_elems(env, desc, esz); \ 4497 uint32_t vta = vext_vta(desc); \ 4498 uint32_t i; \ 4499 \ 4500 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4501 \ 4502 for (i = env->vstart; i < vl; i++) { \ 4503 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4504 *((ETYPE *)vd + H(i)) = \ 4505 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4506 } \ 4507 env->vstart = 0; \ 4508 /* set tail elements to 1s */ \ 4509 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4510 } 4511 4512 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4513 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4514 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4515 4516 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4517 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4518 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4519 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4520 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4521 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4522 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4523 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4524 4525 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4526 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4527 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4528 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4529 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4530 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4531 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4532 4533 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4534 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4535 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4536 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4537 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4538 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4539 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4540 4541 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4542 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4543 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4544 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4545 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4546 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4547 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4548 4549 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4550 /* (TD, T2, TX2) */ 4551 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4552 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4553 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4554 /* 4555 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4556 */ 4557 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4558 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4559 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4560 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4561 4562 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4563 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4564 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4565 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4566 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4567 4568 /* 4569 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4570 */ 4571 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4572 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4573 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4574 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4575 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4576 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4577 4578 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4579 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4580 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4581 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4582 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4583 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4584 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4585 4586 /* 4587 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4588 */ 4589 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4590 { 4591 return float16_to_float32(a, true, s); 4592 } 4593 4594 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4595 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4596 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4597 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4598 4599 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4600 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4601 4602 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4603 /* (TD, T2, TX2) */ 4604 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4605 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4606 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4607 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4608 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4609 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4610 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4611 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4612 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4613 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4614 4615 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4616 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4617 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4618 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4619 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4620 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4621 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4622 4623 /* 4624 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4625 */ 4626 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4627 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4628 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4629 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4630 4631 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4632 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4633 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4634 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4635 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4636 4637 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4638 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4639 { 4640 return float32_to_float16(a, true, s); 4641 } 4642 4643 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4644 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4645 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4646 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4647 4648 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4649 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4650 4651 /* 4652 * Vector Reduction Operations 4653 */ 4654 /* Vector Single-Width Integer Reduction Instructions */ 4655 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4656 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4657 void *vs2, CPURISCVState *env, \ 4658 uint32_t desc) \ 4659 { \ 4660 uint32_t vm = vext_vm(desc); \ 4661 uint32_t vl = env->vl; \ 4662 uint32_t esz = sizeof(TD); \ 4663 uint32_t vlenb = simd_maxsz(desc); \ 4664 uint32_t vta = vext_vta(desc); \ 4665 uint32_t i; \ 4666 TD s1 = *((TD *)vs1 + HD(0)); \ 4667 \ 4668 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4669 \ 4670 for (i = env->vstart; i < vl; i++) { \ 4671 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4672 if (!vm && !vext_elem_mask(v0, i)) { \ 4673 continue; \ 4674 } \ 4675 s1 = OP(s1, (TD)s2); \ 4676 } \ 4677 if (vl > 0) { \ 4678 *((TD *)vd + HD(0)) = s1; \ 4679 } \ 4680 env->vstart = 0; \ 4681 /* set tail elements to 1s */ \ 4682 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4683 } 4684 4685 /* vd[0] = sum(vs1[0], vs2[*]) */ 4686 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4687 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4688 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4689 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4690 4691 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4692 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4693 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4694 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4695 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4696 4697 /* vd[0] = max(vs1[0], vs2[*]) */ 4698 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4699 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4700 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4701 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4702 4703 /* vd[0] = minu(vs1[0], vs2[*]) */ 4704 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4705 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4706 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4707 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4708 4709 /* vd[0] = min(vs1[0], vs2[*]) */ 4710 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4711 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4712 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4713 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4714 4715 /* vd[0] = and(vs1[0], vs2[*]) */ 4716 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4717 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4718 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4719 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4720 4721 /* vd[0] = or(vs1[0], vs2[*]) */ 4722 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4723 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4724 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4725 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4726 4727 /* vd[0] = xor(vs1[0], vs2[*]) */ 4728 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4729 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4730 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4731 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4732 4733 /* Vector Widening Integer Reduction Instructions */ 4734 /* signed sum reduction into double-width accumulator */ 4735 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4736 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4737 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4738 4739 /* Unsigned sum reduction into double-width accumulator */ 4740 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4741 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4742 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4743 4744 /* Vector Single-Width Floating-Point Reduction Instructions */ 4745 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4746 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4747 void *vs2, CPURISCVState *env, \ 4748 uint32_t desc) \ 4749 { \ 4750 uint32_t vm = vext_vm(desc); \ 4751 uint32_t vl = env->vl; \ 4752 uint32_t esz = sizeof(TD); \ 4753 uint32_t vlenb = simd_maxsz(desc); \ 4754 uint32_t vta = vext_vta(desc); \ 4755 uint32_t i; \ 4756 TD s1 = *((TD *)vs1 + HD(0)); \ 4757 \ 4758 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4759 \ 4760 for (i = env->vstart; i < vl; i++) { \ 4761 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4762 if (!vm && !vext_elem_mask(v0, i)) { \ 4763 continue; \ 4764 } \ 4765 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4766 } \ 4767 if (vl > 0) { \ 4768 *((TD *)vd + HD(0)) = s1; \ 4769 } \ 4770 env->vstart = 0; \ 4771 /* set tail elements to 1s */ \ 4772 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4773 } 4774 4775 /* Unordered sum */ 4776 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4777 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4778 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4779 4780 /* Ordered sum */ 4781 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4782 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4783 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4784 4785 /* Maximum value */ 4786 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4787 float16_maximum_number) 4788 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4789 float32_maximum_number) 4790 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4791 float64_maximum_number) 4792 4793 /* Minimum value */ 4794 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4795 float16_minimum_number) 4796 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4797 float32_minimum_number) 4798 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4799 float64_minimum_number) 4800 4801 /* Vector Widening Floating-Point Add Instructions */ 4802 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4803 { 4804 return float32_add(a, float16_to_float32(b, true, s), s); 4805 } 4806 4807 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4808 { 4809 return float64_add(a, float32_to_float64(b, s), s); 4810 } 4811 4812 /* Vector Widening Floating-Point Reduction Instructions */ 4813 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4814 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4815 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4816 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4817 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4818 4819 /* 4820 * Vector Mask Operations 4821 */ 4822 /* Vector Mask-Register Logical Instructions */ 4823 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4824 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4825 void *vs2, CPURISCVState *env, \ 4826 uint32_t desc) \ 4827 { \ 4828 uint32_t vl = env->vl; \ 4829 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4830 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4831 uint32_t i; \ 4832 int a, b; \ 4833 \ 4834 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4835 \ 4836 for (i = env->vstart; i < vl; i++) { \ 4837 a = vext_elem_mask(vs1, i); \ 4838 b = vext_elem_mask(vs2, i); \ 4839 vext_set_elem_mask(vd, i, OP(b, a)); \ 4840 } \ 4841 env->vstart = 0; \ 4842 /* 4843 * mask destination register are always tail-agnostic 4844 * set tail elements to 1s 4845 */ \ 4846 if (vta_all_1s) { \ 4847 for (; i < total_elems; i++) { \ 4848 vext_set_elem_mask(vd, i, 1); \ 4849 } \ 4850 } \ 4851 } 4852 4853 #define DO_NAND(N, M) (!(N & M)) 4854 #define DO_ANDNOT(N, M) (N & !M) 4855 #define DO_NOR(N, M) (!(N | M)) 4856 #define DO_ORNOT(N, M) (N | !M) 4857 #define DO_XNOR(N, M) (!(N ^ M)) 4858 4859 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4860 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4861 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4862 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4863 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4864 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4865 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4866 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4867 4868 /* Vector count population in mask vcpop */ 4869 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4870 uint32_t desc) 4871 { 4872 target_ulong cnt = 0; 4873 uint32_t vm = vext_vm(desc); 4874 uint32_t vl = env->vl; 4875 int i; 4876 4877 for (i = env->vstart; i < vl; i++) { 4878 if (vm || vext_elem_mask(v0, i)) { 4879 if (vext_elem_mask(vs2, i)) { 4880 cnt++; 4881 } 4882 } 4883 } 4884 env->vstart = 0; 4885 return cnt; 4886 } 4887 4888 /* vfirst find-first-set mask bit */ 4889 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4890 uint32_t desc) 4891 { 4892 uint32_t vm = vext_vm(desc); 4893 uint32_t vl = env->vl; 4894 int i; 4895 4896 for (i = env->vstart; i < vl; i++) { 4897 if (vm || vext_elem_mask(v0, i)) { 4898 if (vext_elem_mask(vs2, i)) { 4899 return i; 4900 } 4901 } 4902 } 4903 env->vstart = 0; 4904 return -1LL; 4905 } 4906 4907 enum set_mask_type { 4908 ONLY_FIRST = 1, 4909 INCLUDE_FIRST, 4910 BEFORE_FIRST, 4911 }; 4912 4913 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4914 uint32_t desc, enum set_mask_type type) 4915 { 4916 uint32_t vm = vext_vm(desc); 4917 uint32_t vl = env->vl; 4918 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4919 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4920 uint32_t vma = vext_vma(desc); 4921 int i; 4922 bool first_mask_bit = false; 4923 4924 VSTART_CHECK_EARLY_EXIT(env, vl); 4925 4926 for (i = env->vstart; i < vl; i++) { 4927 if (!vm && !vext_elem_mask(v0, i)) { 4928 /* set masked-off elements to 1s */ 4929 if (vma) { 4930 vext_set_elem_mask(vd, i, 1); 4931 } 4932 continue; 4933 } 4934 /* write a zero to all following active elements */ 4935 if (first_mask_bit) { 4936 vext_set_elem_mask(vd, i, 0); 4937 continue; 4938 } 4939 if (vext_elem_mask(vs2, i)) { 4940 first_mask_bit = true; 4941 if (type == BEFORE_FIRST) { 4942 vext_set_elem_mask(vd, i, 0); 4943 } else { 4944 vext_set_elem_mask(vd, i, 1); 4945 } 4946 } else { 4947 if (type == ONLY_FIRST) { 4948 vext_set_elem_mask(vd, i, 0); 4949 } else { 4950 vext_set_elem_mask(vd, i, 1); 4951 } 4952 } 4953 } 4954 env->vstart = 0; 4955 /* 4956 * mask destination register are always tail-agnostic 4957 * set tail elements to 1s 4958 */ 4959 if (vta_all_1s) { 4960 for (; i < total_elems; i++) { 4961 vext_set_elem_mask(vd, i, 1); 4962 } 4963 } 4964 } 4965 4966 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4967 uint32_t desc) 4968 { 4969 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4970 } 4971 4972 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4973 uint32_t desc) 4974 { 4975 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4976 } 4977 4978 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4979 uint32_t desc) 4980 { 4981 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4982 } 4983 4984 /* Vector Iota Instruction */ 4985 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4986 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4987 uint32_t desc) \ 4988 { \ 4989 uint32_t vm = vext_vm(desc); \ 4990 uint32_t vl = env->vl; \ 4991 uint32_t esz = sizeof(ETYPE); \ 4992 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4993 uint32_t vta = vext_vta(desc); \ 4994 uint32_t vma = vext_vma(desc); \ 4995 uint32_t sum = 0; \ 4996 int i; \ 4997 \ 4998 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4999 \ 5000 for (i = env->vstart; i < vl; i++) { \ 5001 if (!vm && !vext_elem_mask(v0, i)) { \ 5002 /* set masked-off elements to 1s */ \ 5003 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5004 continue; \ 5005 } \ 5006 *((ETYPE *)vd + H(i)) = sum; \ 5007 if (vext_elem_mask(vs2, i)) { \ 5008 sum++; \ 5009 } \ 5010 } \ 5011 env->vstart = 0; \ 5012 /* set tail elements to 1s */ \ 5013 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5014 } 5015 5016 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 5017 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 5018 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 5019 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 5020 5021 /* Vector Element Index Instruction */ 5022 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 5023 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 5024 { \ 5025 uint32_t vm = vext_vm(desc); \ 5026 uint32_t vl = env->vl; \ 5027 uint32_t esz = sizeof(ETYPE); \ 5028 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5029 uint32_t vta = vext_vta(desc); \ 5030 uint32_t vma = vext_vma(desc); \ 5031 int i; \ 5032 \ 5033 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5034 \ 5035 for (i = env->vstart; i < vl; i++) { \ 5036 if (!vm && !vext_elem_mask(v0, i)) { \ 5037 /* set masked-off elements to 1s */ \ 5038 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5039 continue; \ 5040 } \ 5041 *((ETYPE *)vd + H(i)) = i; \ 5042 } \ 5043 env->vstart = 0; \ 5044 /* set tail elements to 1s */ \ 5045 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5046 } 5047 5048 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 5049 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 5050 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 5051 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 5052 5053 /* 5054 * Vector Permutation Instructions 5055 */ 5056 5057 /* Vector Slide Instructions */ 5058 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 5059 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5060 CPURISCVState *env, uint32_t desc) \ 5061 { \ 5062 uint32_t vm = vext_vm(desc); \ 5063 uint32_t vl = env->vl; \ 5064 uint32_t esz = sizeof(ETYPE); \ 5065 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5066 uint32_t vta = vext_vta(desc); \ 5067 uint32_t vma = vext_vma(desc); \ 5068 target_ulong offset = s1, i_min, i; \ 5069 \ 5070 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5071 \ 5072 i_min = MAX(env->vstart, offset); \ 5073 for (i = i_min; i < vl; i++) { \ 5074 if (!vm && !vext_elem_mask(v0, i)) { \ 5075 /* set masked-off elements to 1s */ \ 5076 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5077 continue; \ 5078 } \ 5079 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5080 } \ 5081 env->vstart = 0; \ 5082 /* set tail elements to 1s */ \ 5083 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5084 } 5085 5086 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5087 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5088 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5089 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5090 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5091 5092 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5093 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5094 CPURISCVState *env, uint32_t desc) \ 5095 { \ 5096 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5097 uint32_t vm = vext_vm(desc); \ 5098 uint32_t vl = env->vl; \ 5099 uint32_t esz = sizeof(ETYPE); \ 5100 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5101 uint32_t vta = vext_vta(desc); \ 5102 uint32_t vma = vext_vma(desc); \ 5103 target_ulong i_max, i_min, i; \ 5104 \ 5105 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5106 \ 5107 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 5108 i_max = MAX(i_min, env->vstart); \ 5109 for (i = env->vstart; i < i_max; ++i) { \ 5110 if (!vm && !vext_elem_mask(v0, i)) { \ 5111 /* set masked-off elements to 1s */ \ 5112 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5113 continue; \ 5114 } \ 5115 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5116 } \ 5117 \ 5118 for (i = i_max; i < vl; ++i) { \ 5119 if (vm || vext_elem_mask(v0, i)) { \ 5120 *((ETYPE *)vd + H(i)) = 0; \ 5121 } \ 5122 } \ 5123 \ 5124 env->vstart = 0; \ 5125 /* set tail elements to 1s */ \ 5126 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5127 } 5128 5129 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5130 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5131 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5132 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5133 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5134 5135 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5136 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5137 void *vs2, CPURISCVState *env, \ 5138 uint32_t desc) \ 5139 { \ 5140 typedef uint##BITWIDTH##_t ETYPE; \ 5141 uint32_t vm = vext_vm(desc); \ 5142 uint32_t vl = env->vl; \ 5143 uint32_t esz = sizeof(ETYPE); \ 5144 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5145 uint32_t vta = vext_vta(desc); \ 5146 uint32_t vma = vext_vma(desc); \ 5147 uint32_t i; \ 5148 \ 5149 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5150 \ 5151 for (i = env->vstart; i < vl; i++) { \ 5152 if (!vm && !vext_elem_mask(v0, i)) { \ 5153 /* set masked-off elements to 1s */ \ 5154 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5155 continue; \ 5156 } \ 5157 if (i == 0) { \ 5158 *((ETYPE *)vd + H(i)) = s1; \ 5159 } else { \ 5160 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5161 } \ 5162 } \ 5163 env->vstart = 0; \ 5164 /* set tail elements to 1s */ \ 5165 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5166 } 5167 5168 GEN_VEXT_VSLIE1UP(8, H1) 5169 GEN_VEXT_VSLIE1UP(16, H2) 5170 GEN_VEXT_VSLIE1UP(32, H4) 5171 GEN_VEXT_VSLIE1UP(64, H8) 5172 5173 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5174 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5175 CPURISCVState *env, uint32_t desc) \ 5176 { \ 5177 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5178 } 5179 5180 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5181 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5182 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5183 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5184 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5185 5186 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5187 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5188 void *vs2, CPURISCVState *env, \ 5189 uint32_t desc) \ 5190 { \ 5191 typedef uint##BITWIDTH##_t ETYPE; \ 5192 uint32_t vm = vext_vm(desc); \ 5193 uint32_t vl = env->vl; \ 5194 uint32_t esz = sizeof(ETYPE); \ 5195 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5196 uint32_t vta = vext_vta(desc); \ 5197 uint32_t vma = vext_vma(desc); \ 5198 uint32_t i; \ 5199 \ 5200 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5201 \ 5202 for (i = env->vstart; i < vl; i++) { \ 5203 if (!vm && !vext_elem_mask(v0, i)) { \ 5204 /* set masked-off elements to 1s */ \ 5205 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5206 continue; \ 5207 } \ 5208 if (i == vl - 1) { \ 5209 *((ETYPE *)vd + H(i)) = s1; \ 5210 } else { \ 5211 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5212 } \ 5213 } \ 5214 env->vstart = 0; \ 5215 /* set tail elements to 1s */ \ 5216 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5217 } 5218 5219 GEN_VEXT_VSLIDE1DOWN(8, H1) 5220 GEN_VEXT_VSLIDE1DOWN(16, H2) 5221 GEN_VEXT_VSLIDE1DOWN(32, H4) 5222 GEN_VEXT_VSLIDE1DOWN(64, H8) 5223 5224 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5225 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5226 CPURISCVState *env, uint32_t desc) \ 5227 { \ 5228 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5229 } 5230 5231 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5232 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5233 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5234 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5235 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5236 5237 /* Vector Floating-Point Slide Instructions */ 5238 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5239 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5240 CPURISCVState *env, uint32_t desc) \ 5241 { \ 5242 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5243 } 5244 5245 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5246 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5247 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5248 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5249 5250 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5251 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5252 CPURISCVState *env, uint32_t desc) \ 5253 { \ 5254 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5255 } 5256 5257 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5258 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5259 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5260 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5261 5262 /* Vector Register Gather Instruction */ 5263 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5264 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5265 CPURISCVState *env, uint32_t desc) \ 5266 { \ 5267 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5268 uint32_t vm = vext_vm(desc); \ 5269 uint32_t vl = env->vl; \ 5270 uint32_t esz = sizeof(TS2); \ 5271 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5272 uint32_t vta = vext_vta(desc); \ 5273 uint32_t vma = vext_vma(desc); \ 5274 uint64_t index; \ 5275 uint32_t i; \ 5276 \ 5277 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5278 \ 5279 for (i = env->vstart; i < vl; i++) { \ 5280 if (!vm && !vext_elem_mask(v0, i)) { \ 5281 /* set masked-off elements to 1s */ \ 5282 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5283 continue; \ 5284 } \ 5285 index = *((TS1 *)vs1 + HS1(i)); \ 5286 if (index >= vlmax) { \ 5287 *((TS2 *)vd + HS2(i)) = 0; \ 5288 } else { \ 5289 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5290 } \ 5291 } \ 5292 env->vstart = 0; \ 5293 /* set tail elements to 1s */ \ 5294 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5295 } 5296 5297 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5298 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5299 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5300 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5301 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5302 5303 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5304 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5305 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5306 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5307 5308 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5309 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5310 CPURISCVState *env, uint32_t desc) \ 5311 { \ 5312 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5313 uint32_t vm = vext_vm(desc); \ 5314 uint32_t vl = env->vl; \ 5315 uint32_t esz = sizeof(ETYPE); \ 5316 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5317 uint32_t vta = vext_vta(desc); \ 5318 uint32_t vma = vext_vma(desc); \ 5319 uint64_t index = s1; \ 5320 uint32_t i; \ 5321 \ 5322 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5323 \ 5324 for (i = env->vstart; i < vl; i++) { \ 5325 if (!vm && !vext_elem_mask(v0, i)) { \ 5326 /* set masked-off elements to 1s */ \ 5327 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5328 continue; \ 5329 } \ 5330 if (index >= vlmax) { \ 5331 *((ETYPE *)vd + H(i)) = 0; \ 5332 } else { \ 5333 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5334 } \ 5335 } \ 5336 env->vstart = 0; \ 5337 /* set tail elements to 1s */ \ 5338 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5339 } 5340 5341 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5342 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5343 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5344 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5345 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5346 5347 /* Vector Compress Instruction */ 5348 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5349 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5350 CPURISCVState *env, uint32_t desc) \ 5351 { \ 5352 uint32_t vl = env->vl; \ 5353 uint32_t esz = sizeof(ETYPE); \ 5354 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5355 uint32_t vta = vext_vta(desc); \ 5356 uint32_t num = 0, i; \ 5357 \ 5358 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5359 \ 5360 for (i = env->vstart; i < vl; i++) { \ 5361 if (!vext_elem_mask(vs1, i)) { \ 5362 continue; \ 5363 } \ 5364 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5365 num++; \ 5366 } \ 5367 env->vstart = 0; \ 5368 /* set tail elements to 1s */ \ 5369 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ 5370 } 5371 5372 /* Compress into vd elements of vs2 where vs1 is enabled */ 5373 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5374 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5375 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5376 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5377 5378 /* Vector Whole Register Move */ 5379 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5380 { 5381 /* EEW = SEW */ 5382 uint32_t maxsz = simd_maxsz(desc); 5383 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5384 uint32_t startb = env->vstart * sewb; 5385 uint32_t i = startb; 5386 5387 if (startb >= maxsz) { 5388 env->vstart = 0; 5389 return; 5390 } 5391 5392 if (HOST_BIG_ENDIAN && i % 8 != 0) { 5393 uint32_t j = ROUND_UP(i, 8); 5394 memcpy((uint8_t *)vd + H1(j - 1), 5395 (uint8_t *)vs2 + H1(j - 1), 5396 j - i); 5397 i = j; 5398 } 5399 5400 memcpy((uint8_t *)vd + H1(i), 5401 (uint8_t *)vs2 + H1(i), 5402 maxsz - i); 5403 5404 env->vstart = 0; 5405 } 5406 5407 /* Vector Integer Extension */ 5408 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5409 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5410 CPURISCVState *env, uint32_t desc) \ 5411 { \ 5412 uint32_t vl = env->vl; \ 5413 uint32_t vm = vext_vm(desc); \ 5414 uint32_t esz = sizeof(ETYPE); \ 5415 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5416 uint32_t vta = vext_vta(desc); \ 5417 uint32_t vma = vext_vma(desc); \ 5418 uint32_t i; \ 5419 \ 5420 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5421 \ 5422 for (i = env->vstart; i < vl; i++) { \ 5423 if (!vm && !vext_elem_mask(v0, i)) { \ 5424 /* set masked-off elements to 1s */ \ 5425 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5426 continue; \ 5427 } \ 5428 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5429 } \ 5430 env->vstart = 0; \ 5431 /* set tail elements to 1s */ \ 5432 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5433 } 5434 5435 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5436 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5437 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5438 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5439 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5440 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5441 5442 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5443 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5444 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5445 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5446 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5447 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5448