1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/page-protection.h" 27 #include "exec/helper-proto.h" 28 #include "fpu/softfloat.h" 29 #include "tcg/tcg-gvec-desc.h" 30 #include "internals.h" 31 #include "vector_internals.h" 32 #include <math.h> 33 34 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 35 target_ulong s2) 36 { 37 int vlmax, vl; 38 RISCVCPU *cpu = env_archcpu(env); 39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 41 uint16_t sew = 8 << vsew; 42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 43 int xlen = riscv_cpu_xlen(env); 44 bool vill = (s2 >> (xlen - 1)) & 0x1; 45 target_ulong reserved = s2 & 46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 47 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 48 uint16_t vlen = cpu->cfg.vlenb << 3; 49 int8_t lmul; 50 51 if (vlmul & 4) { 52 /* 53 * Fractional LMUL, check: 54 * 55 * VLEN * LMUL >= SEW 56 * VLEN >> (8 - lmul) >= sew 57 * (vlenb << 3) >> (8 - lmul) >= sew 58 */ 59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { 60 vill = true; 61 } 62 } 63 64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 65 /* only set vill bit. */ 66 env->vill = 1; 67 env->vtype = 0; 68 env->vl = 0; 69 env->vstart = 0; 70 return 0; 71 } 72 73 /* lmul encoded as in DisasContext::lmul */ 74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 76 if (s1 <= vlmax) { 77 vl = s1; 78 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) { 79 vl = (s1 + 1) >> 1; 80 } else { 81 vl = vlmax; 82 } 83 env->vl = vl; 84 env->vtype = s2; 85 env->vstart = 0; 86 env->vill = 0; 87 return vl; 88 } 89 90 /* 91 * Get the maximum number of elements can be operated. 92 * 93 * log2_esz: log2 of element size in bytes. 94 */ 95 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 96 { 97 /* 98 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 99 * so vlen in bytes (vlenb) is encoded as maxsz. 100 */ 101 uint32_t vlenb = simd_maxsz(desc); 102 103 /* Return VLMAX */ 104 int scale = vext_lmul(desc) - log2_esz; 105 return scale < 0 ? vlenb >> -scale : vlenb << scale; 106 } 107 108 /* 109 * This function checks watchpoint before real load operation. 110 * 111 * In system mode, the TLB API probe_access is enough for watchpoint check. 112 * In user mode, there is no watchpoint support now. 113 * 114 * It will trigger an exception if there is no mapping in TLB 115 * and page table walk can't fill the TLB entry. Then the guest 116 * software can return here after process the exception or never return. 117 */ 118 static void probe_pages(CPURISCVState *env, target_ulong addr, 119 target_ulong len, uintptr_t ra, 120 MMUAccessType access_type) 121 { 122 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 123 target_ulong curlen = MIN(pagelen, len); 124 int mmu_index = riscv_env_mmu_index(env, false); 125 126 probe_access(env, adjust_addr(env, addr), curlen, access_type, 127 mmu_index, ra); 128 if (len > curlen) { 129 addr += curlen; 130 curlen = len - curlen; 131 probe_access(env, adjust_addr(env, addr), curlen, access_type, 132 mmu_index, ra); 133 } 134 } 135 136 static inline void vext_set_elem_mask(void *v0, int index, 137 uint8_t value) 138 { 139 int idx = index / 64; 140 int pos = index % 64; 141 uint64_t old = ((uint64_t *)v0)[idx]; 142 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 143 } 144 145 /* elements operations for load and store */ 146 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, 147 uint32_t idx, void *vd, uintptr_t retaddr); 148 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); 149 150 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 151 static inline QEMU_ALWAYS_INLINE \ 152 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 153 uint32_t idx, void *vd, uintptr_t retaddr) \ 154 { \ 155 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 156 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 157 } \ 158 \ 159 static inline QEMU_ALWAYS_INLINE \ 160 void NAME##_host(void *vd, uint32_t idx, void *host) \ 161 { \ 162 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 163 *cur = (ETYPE)LDSUF##_p(host); \ 164 } 165 166 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub) 167 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw) 168 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) 169 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) 170 171 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 172 static inline QEMU_ALWAYS_INLINE \ 173 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 174 uint32_t idx, void *vd, uintptr_t retaddr) \ 175 { \ 176 ETYPE data = *((ETYPE *)vd + H(idx)); \ 177 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 178 } \ 179 \ 180 static inline QEMU_ALWAYS_INLINE \ 181 void NAME##_host(void *vd, uint32_t idx, void *host) \ 182 { \ 183 ETYPE data = *((ETYPE *)vd + H(idx)); \ 184 STSUF##_p(host, data); \ 185 } 186 187 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb) 188 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw) 189 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl) 190 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq) 191 192 static inline QEMU_ALWAYS_INLINE void 193 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb, 194 void *vd, uint32_t evl, target_ulong addr, 195 uint32_t reg_start, uintptr_t ra, uint32_t esz, 196 bool is_load) 197 { 198 uint32_t i; 199 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) { 200 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra); 201 } 202 } 203 204 static inline QEMU_ALWAYS_INLINE void 205 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host, 206 void *vd, uint32_t evl, uint32_t reg_start, void *host, 207 uint32_t esz, bool is_load) 208 { 209 #if HOST_BIG_ENDIAN 210 for (; reg_start < evl; reg_start++, host += esz) { 211 ldst_host(vd, reg_start, host); 212 } 213 #else 214 if (esz == 1) { 215 uint32_t byte_offset = reg_start * esz; 216 uint32_t size = (evl - reg_start) * esz; 217 218 if (is_load) { 219 memcpy(vd + byte_offset, host, size); 220 } else { 221 memcpy(host, vd + byte_offset, size); 222 } 223 } else { 224 for (; reg_start < evl; reg_start++, host += esz) { 225 ldst_host(vd, reg_start, host); 226 } 227 } 228 #endif 229 } 230 231 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 232 uint32_t desc, uint32_t nf, 233 uint32_t esz, uint32_t max_elems) 234 { 235 uint32_t vta = vext_vta(desc); 236 int k; 237 238 if (vta == 0) { 239 return; 240 } 241 242 for (k = 0; k < nf; ++k) { 243 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 244 (k * max_elems + max_elems) * esz); 245 } 246 } 247 248 /* 249 * stride: access vector element from strided memory 250 */ 251 static void 252 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, 253 CPURISCVState *env, uint32_t desc, uint32_t vm, 254 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz, 255 uintptr_t ra) 256 { 257 uint32_t i, k; 258 uint32_t nf = vext_nf(desc); 259 uint32_t max_elems = vext_max_elems(desc, log2_esz); 260 uint32_t esz = 1 << log2_esz; 261 uint32_t vma = vext_vma(desc); 262 263 VSTART_CHECK_EARLY_EXIT(env, env->vl); 264 265 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 266 k = 0; 267 while (k < nf) { 268 if (!vm && !vext_elem_mask(v0, i)) { 269 /* set masked-off elements to 1s */ 270 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 271 (i + k * max_elems + 1) * esz); 272 k++; 273 continue; 274 } 275 target_ulong addr = base + stride * i + (k << log2_esz); 276 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 277 k++; 278 } 279 } 280 env->vstart = 0; 281 282 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 283 } 284 285 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 286 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 287 target_ulong stride, CPURISCVState *env, \ 288 uint32_t desc) \ 289 { \ 290 uint32_t vm = vext_vm(desc); \ 291 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 292 ctzl(sizeof(ETYPE)), GETPC()); \ 293 } 294 295 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb) 296 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb) 297 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb) 298 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb) 299 300 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 301 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 302 target_ulong stride, CPURISCVState *env, \ 303 uint32_t desc) \ 304 { \ 305 uint32_t vm = vext_vm(desc); \ 306 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 307 ctzl(sizeof(ETYPE)), GETPC()); \ 308 } 309 310 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb) 311 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb) 312 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb) 313 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) 314 315 /* 316 * unit-stride: access elements stored contiguously in memory 317 */ 318 319 /* unmasked unit-stride load and store operation */ 320 static inline QEMU_ALWAYS_INLINE void 321 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, 322 uint32_t elems, uint32_t nf, uint32_t max_elems, 323 uint32_t log2_esz, bool is_load, int mmu_index, 324 vext_ldst_elem_fn_tlb *ldst_tlb, 325 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra) 326 { 327 void *host; 328 int i, k, flags; 329 uint32_t esz = 1 << log2_esz; 330 uint32_t size = (elems * nf) << log2_esz; 331 uint32_t evl = env->vstart + elems; 332 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 333 334 /* Check page permission/pmp/watchpoint/etc. */ 335 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type, 336 mmu_index, true, &host, ra); 337 338 if (flags == 0) { 339 if (nf == 1) { 340 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart, 341 host, esz, is_load); 342 } else { 343 for (i = env->vstart; i < evl; ++i) { 344 k = 0; 345 while (k < nf) { 346 ldst_host(vd, i + k * max_elems, host); 347 host += esz; 348 k++; 349 } 350 } 351 } 352 env->vstart += elems; 353 } else { 354 if (nf == 1) { 355 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, 356 ra, esz, is_load); 357 } else { 358 /* load bytes from guest memory */ 359 for (i = env->vstart; i < evl; env->vstart = ++i) { 360 k = 0; 361 while (k < nf) { 362 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 363 vd, ra); 364 addr += esz; 365 k++; 366 } 367 } 368 } 369 } 370 } 371 372 static inline QEMU_ALWAYS_INLINE void 373 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 374 vext_ldst_elem_fn_tlb *ldst_tlb, 375 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 376 uint32_t evl, uintptr_t ra, bool is_load) 377 { 378 uint32_t k; 379 target_ulong page_split, elems, addr; 380 uint32_t nf = vext_nf(desc); 381 uint32_t max_elems = vext_max_elems(desc, log2_esz); 382 uint32_t esz = 1 << log2_esz; 383 uint32_t msize = nf * esz; 384 int mmu_index = riscv_env_mmu_index(env, false); 385 386 VSTART_CHECK_EARLY_EXIT(env, evl); 387 388 #if defined(CONFIG_USER_ONLY) 389 /* 390 * For data sizes <= 6 bytes we get better performance by simply calling 391 * vext_continuous_ldst_tlb 392 */ 393 if (nf == 1 && (evl << log2_esz) <= 6) { 394 addr = base + (env->vstart << log2_esz); 395 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra, 396 esz, is_load); 397 398 env->vstart = 0; 399 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 400 return; 401 } 402 #endif 403 404 /* Calculate the page range of first page */ 405 addr = base + ((env->vstart * nf) << log2_esz); 406 page_split = -(addr | TARGET_PAGE_MASK); 407 /* Get number of elements */ 408 elems = page_split / msize; 409 if (unlikely(env->vstart + elems >= evl)) { 410 elems = evl - env->vstart; 411 } 412 413 /* Load/store elements in the first page */ 414 if (likely(elems)) { 415 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 416 is_load, mmu_index, ldst_tlb, ldst_host, ra); 417 } 418 419 /* Load/store elements in the second page */ 420 if (unlikely(env->vstart < evl)) { 421 /* Cross page element */ 422 if (unlikely(page_split % msize)) { 423 for (k = 0; k < nf; k++) { 424 addr = base + ((env->vstart * nf + k) << log2_esz); 425 ldst_tlb(env, adjust_addr(env, addr), 426 env->vstart + k * max_elems, vd, ra); 427 } 428 env->vstart++; 429 } 430 431 addr = base + ((env->vstart * nf) << log2_esz); 432 /* Get number of elements of second page */ 433 elems = evl - env->vstart; 434 435 /* Load/store elements in the second page */ 436 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 437 is_load, mmu_index, ldst_tlb, ldst_host, ra); 438 } 439 440 env->vstart = 0; 441 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 442 } 443 444 /* 445 * masked unit-stride load and store operation will be a special case of 446 * stride, stride = NF * sizeof (ETYPE) 447 */ 448 449 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 450 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 451 CPURISCVState *env, uint32_t desc) \ 452 { \ 453 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 454 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 455 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 456 } \ 457 \ 458 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 459 CPURISCVState *env, uint32_t desc) \ 460 { \ 461 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 462 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \ 463 } 464 465 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host) 466 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host) 467 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host) 468 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host) 469 470 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 471 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 472 CPURISCVState *env, uint32_t desc) \ 473 { \ 474 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 475 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 476 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 477 } \ 478 \ 479 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 480 CPURISCVState *env, uint32_t desc) \ 481 { \ 482 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 483 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \ 484 } 485 486 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host) 487 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host) 488 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host) 489 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host) 490 491 /* 492 * unit stride mask load and store, EEW = 1 493 */ 494 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 495 CPURISCVState *env, uint32_t desc) 496 { 497 /* evl = ceil(vl/8) */ 498 uint8_t evl = (env->vl + 7) >> 3; 499 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host, 500 0, evl, GETPC(), true); 501 } 502 503 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 504 CPURISCVState *env, uint32_t desc) 505 { 506 /* evl = ceil(vl/8) */ 507 uint8_t evl = (env->vl + 7) >> 3; 508 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host, 509 0, evl, GETPC(), false); 510 } 511 512 /* 513 * index: access vector element from indexed memory 514 */ 515 typedef target_ulong vext_get_index_addr(target_ulong base, 516 uint32_t idx, void *vs2); 517 518 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 519 static target_ulong NAME(target_ulong base, \ 520 uint32_t idx, void *vs2) \ 521 { \ 522 return (base + *((ETYPE *)vs2 + H(idx))); \ 523 } 524 525 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 526 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 527 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 528 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 529 530 static inline void 531 vext_ldst_index(void *vd, void *v0, target_ulong base, 532 void *vs2, CPURISCVState *env, uint32_t desc, 533 vext_get_index_addr get_index_addr, 534 vext_ldst_elem_fn_tlb *ldst_elem, 535 uint32_t log2_esz, uintptr_t ra) 536 { 537 uint32_t i, k; 538 uint32_t nf = vext_nf(desc); 539 uint32_t vm = vext_vm(desc); 540 uint32_t max_elems = vext_max_elems(desc, log2_esz); 541 uint32_t esz = 1 << log2_esz; 542 uint32_t vma = vext_vma(desc); 543 544 VSTART_CHECK_EARLY_EXIT(env, env->vl); 545 546 /* load bytes from guest memory */ 547 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 548 k = 0; 549 while (k < nf) { 550 if (!vm && !vext_elem_mask(v0, i)) { 551 /* set masked-off elements to 1s */ 552 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 553 (i + k * max_elems + 1) * esz); 554 k++; 555 continue; 556 } 557 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 558 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 559 k++; 560 } 561 } 562 env->vstart = 0; 563 564 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 565 } 566 567 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 568 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 569 void *vs2, CPURISCVState *env, uint32_t desc) \ 570 { \ 571 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 572 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 573 } 574 575 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb) 576 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb) 577 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb) 578 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb) 579 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb) 580 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb) 581 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb) 582 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb) 583 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb) 584 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb) 585 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb) 586 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb) 587 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb) 588 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb) 589 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb) 590 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb) 591 592 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 593 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 594 void *vs2, CPURISCVState *env, uint32_t desc) \ 595 { \ 596 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 597 STORE_FN, ctzl(sizeof(ETYPE)), \ 598 GETPC()); \ 599 } 600 601 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb) 602 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb) 603 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb) 604 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb) 605 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb) 606 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb) 607 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb) 608 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb) 609 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb) 610 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb) 611 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb) 612 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb) 613 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb) 614 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb) 615 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb) 616 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb) 617 618 /* 619 * unit-stride fault-only-fisrt load instructions 620 */ 621 static inline void 622 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, 623 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, 624 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra) 625 { 626 uint32_t i, k, vl = 0; 627 uint32_t nf = vext_nf(desc); 628 uint32_t vm = vext_vm(desc); 629 uint32_t max_elems = vext_max_elems(desc, log2_esz); 630 uint32_t esz = 1 << log2_esz; 631 uint32_t msize = nf * esz; 632 uint32_t vma = vext_vma(desc); 633 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems; 634 int mmu_index = riscv_env_mmu_index(env, false); 635 int flags; 636 void *host; 637 638 VSTART_CHECK_EARLY_EXIT(env, env->vl); 639 640 addr = base + ((env->vstart * nf) << log2_esz); 641 page_split = -(addr | TARGET_PAGE_MASK); 642 /* Get number of elements */ 643 elems = page_split / msize; 644 if (unlikely(env->vstart + elems >= env->vl)) { 645 elems = env->vl - env->vstart; 646 } 647 648 /* Check page permission/pmp/watchpoint/etc. */ 649 flags = probe_access_flags(env, adjust_addr(env, addr), elems * msize, 650 MMU_DATA_LOAD, mmu_index, true, &host, ra); 651 652 /* If we are crossing a page check also the second page. */ 653 if (env->vl > elems) { 654 addr_probe = addr + (elems << log2_esz); 655 flags |= probe_access_flags(env, adjust_addr(env, addr_probe), 656 elems * msize, MMU_DATA_LOAD, mmu_index, 657 true, &host, ra); 658 } 659 660 if (flags & ~TLB_WATCHPOINT) { 661 /* probe every access */ 662 for (i = env->vstart; i < env->vl; i++) { 663 if (!vm && !vext_elem_mask(v0, i)) { 664 continue; 665 } 666 addr_i = adjust_addr(env, base + i * (nf << log2_esz)); 667 if (i == 0) { 668 /* Allow fault on first element. */ 669 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD); 670 } else { 671 remain = nf << log2_esz; 672 while (remain > 0) { 673 offset = -(addr_i | TARGET_PAGE_MASK); 674 675 /* Probe nonfault on subsequent elements. */ 676 flags = probe_access_flags(env, addr_i, offset, 677 MMU_DATA_LOAD, mmu_index, true, 678 &host, 0); 679 680 /* 681 * Stop if invalid (unmapped) or mmio (transaction may 682 * fail). Do not stop if watchpoint, as the spec says that 683 * first-fault should continue to access the same 684 * elements regardless of any watchpoint. 685 */ 686 if (flags & ~TLB_WATCHPOINT) { 687 vl = i; 688 goto ProbeSuccess; 689 } 690 if (remain <= offset) { 691 break; 692 } 693 remain -= offset; 694 addr_i = adjust_addr(env, addr_i + offset); 695 } 696 } 697 } 698 } 699 ProbeSuccess: 700 /* load bytes from guest memory */ 701 if (vl != 0) { 702 env->vl = vl; 703 } 704 705 if (env->vstart < env->vl) { 706 if (vm) { 707 /* Load/store elements in the first page */ 708 if (likely(elems)) { 709 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 710 log2_esz, true, mmu_index, ldst_tlb, 711 ldst_host, ra); 712 } 713 714 /* Load/store elements in the second page */ 715 if (unlikely(env->vstart < env->vl)) { 716 /* Cross page element */ 717 if (unlikely(page_split % msize)) { 718 for (k = 0; k < nf; k++) { 719 addr = base + ((env->vstart * nf + k) << log2_esz); 720 ldst_tlb(env, adjust_addr(env, addr), 721 env->vstart + k * max_elems, vd, ra); 722 } 723 env->vstart++; 724 } 725 726 addr = base + ((env->vstart * nf) << log2_esz); 727 /* Get number of elements of second page */ 728 elems = env->vl - env->vstart; 729 730 /* Load/store elements in the second page */ 731 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 732 log2_esz, true, mmu_index, ldst_tlb, 733 ldst_host, ra); 734 } 735 } else { 736 for (i = env->vstart; i < env->vl; i++) { 737 k = 0; 738 while (k < nf) { 739 if (!vext_elem_mask(v0, i)) { 740 /* set masked-off elements to 1s */ 741 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 742 (i + k * max_elems + 1) * esz); 743 k++; 744 continue; 745 } 746 addr = base + ((i * nf + k) << log2_esz); 747 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 748 vd, ra); 749 k++; 750 } 751 } 752 } 753 } 754 env->vstart = 0; 755 756 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 757 } 758 759 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 760 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 761 CPURISCVState *env, uint32_t desc) \ 762 { \ 763 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \ 764 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \ 765 } 766 767 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host) 768 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host) 769 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host) 770 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) 771 772 #define DO_SWAP(N, M) (M) 773 #define DO_AND(N, M) (N & M) 774 #define DO_XOR(N, M) (N ^ M) 775 #define DO_OR(N, M) (N | M) 776 #define DO_ADD(N, M) (N + M) 777 778 /* Signed min/max */ 779 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 780 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 781 782 /* 783 * load and store whole register instructions 784 */ 785 static inline QEMU_ALWAYS_INLINE void 786 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 787 vext_ldst_elem_fn_tlb *ldst_tlb, 788 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 789 uintptr_t ra, bool is_load) 790 { 791 target_ulong page_split, elems, addr; 792 uint32_t nf = vext_nf(desc); 793 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 794 uint32_t max_elems = vlenb >> log2_esz; 795 uint32_t evl = nf * max_elems; 796 uint32_t esz = 1 << log2_esz; 797 int mmu_index = riscv_env_mmu_index(env, false); 798 799 /* Calculate the page range of first page */ 800 addr = base + (env->vstart << log2_esz); 801 page_split = -(addr | TARGET_PAGE_MASK); 802 /* Get number of elements */ 803 elems = page_split / esz; 804 if (unlikely(env->vstart + elems >= evl)) { 805 elems = evl - env->vstart; 806 } 807 808 /* Load/store elements in the first page */ 809 if (likely(elems)) { 810 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 811 is_load, mmu_index, ldst_tlb, ldst_host, ra); 812 } 813 814 /* Load/store elements in the second page */ 815 if (unlikely(env->vstart < evl)) { 816 /* Cross page element */ 817 if (unlikely(page_split % esz)) { 818 addr = base + (env->vstart << log2_esz); 819 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra); 820 env->vstart++; 821 } 822 823 addr = base + (env->vstart << log2_esz); 824 /* Get number of elements of second page */ 825 elems = evl - env->vstart; 826 827 /* Load/store elements in the second page */ 828 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 829 is_load, mmu_index, ldst_tlb, ldst_host, ra); 830 } 831 832 env->vstart = 0; 833 } 834 835 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 836 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 837 uint32_t desc) \ 838 { \ 839 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 840 ctzl(sizeof(ETYPE)), GETPC(), true); \ 841 } 842 843 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host) 844 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host) 845 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host) 846 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host) 847 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host) 848 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host) 849 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host) 850 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host) 851 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host) 852 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host) 853 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host) 854 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host) 855 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host) 856 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host) 857 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host) 858 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host) 859 860 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 861 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 862 uint32_t desc) \ 863 { \ 864 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 865 ctzl(sizeof(ETYPE)), GETPC(), false); \ 866 } 867 868 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host) 869 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host) 870 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host) 871 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host) 872 873 /* 874 * Vector Integer Arithmetic Instructions 875 */ 876 877 /* (TD, T1, T2, TX1, TX2) */ 878 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 879 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 880 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 881 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 882 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 883 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 884 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 885 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 886 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 887 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 888 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 889 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 890 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 891 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 892 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 893 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 894 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 895 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 896 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 897 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 898 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 899 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 900 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 901 902 #define DO_SUB(N, M) (N - M) 903 #define DO_RSUB(N, M) (M - N) 904 905 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 906 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 907 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 908 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 909 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 910 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 911 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 912 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 913 914 GEN_VEXT_VV(vadd_vv_b, 1) 915 GEN_VEXT_VV(vadd_vv_h, 2) 916 GEN_VEXT_VV(vadd_vv_w, 4) 917 GEN_VEXT_VV(vadd_vv_d, 8) 918 GEN_VEXT_VV(vsub_vv_b, 1) 919 GEN_VEXT_VV(vsub_vv_h, 2) 920 GEN_VEXT_VV(vsub_vv_w, 4) 921 GEN_VEXT_VV(vsub_vv_d, 8) 922 923 924 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 925 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 926 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 927 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 928 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 929 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 930 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 931 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 932 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 933 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 934 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 935 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 936 937 GEN_VEXT_VX(vadd_vx_b, 1) 938 GEN_VEXT_VX(vadd_vx_h, 2) 939 GEN_VEXT_VX(vadd_vx_w, 4) 940 GEN_VEXT_VX(vadd_vx_d, 8) 941 GEN_VEXT_VX(vsub_vx_b, 1) 942 GEN_VEXT_VX(vsub_vx_h, 2) 943 GEN_VEXT_VX(vsub_vx_w, 4) 944 GEN_VEXT_VX(vsub_vx_d, 8) 945 GEN_VEXT_VX(vrsub_vx_b, 1) 946 GEN_VEXT_VX(vrsub_vx_h, 2) 947 GEN_VEXT_VX(vrsub_vx_w, 4) 948 GEN_VEXT_VX(vrsub_vx_d, 8) 949 950 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 951 { 952 intptr_t oprsz = simd_oprsz(desc); 953 intptr_t i; 954 955 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 956 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 957 } 958 } 959 960 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 961 { 962 intptr_t oprsz = simd_oprsz(desc); 963 intptr_t i; 964 965 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 966 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 967 } 968 } 969 970 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 971 { 972 intptr_t oprsz = simd_oprsz(desc); 973 intptr_t i; 974 975 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 976 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 977 } 978 } 979 980 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 981 { 982 intptr_t oprsz = simd_oprsz(desc); 983 intptr_t i; 984 985 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 986 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 987 } 988 } 989 990 /* Vector Widening Integer Add/Subtract */ 991 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 992 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 993 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 994 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 995 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 996 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 997 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 998 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 999 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 1000 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 1001 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 1002 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 1003 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 1004 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 1005 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 1006 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 1007 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 1008 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1009 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1010 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1011 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1012 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1013 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1014 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1015 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1016 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1017 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1018 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1019 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1020 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1021 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1022 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1023 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1024 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1025 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1026 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1027 GEN_VEXT_VV(vwaddu_vv_b, 2) 1028 GEN_VEXT_VV(vwaddu_vv_h, 4) 1029 GEN_VEXT_VV(vwaddu_vv_w, 8) 1030 GEN_VEXT_VV(vwsubu_vv_b, 2) 1031 GEN_VEXT_VV(vwsubu_vv_h, 4) 1032 GEN_VEXT_VV(vwsubu_vv_w, 8) 1033 GEN_VEXT_VV(vwadd_vv_b, 2) 1034 GEN_VEXT_VV(vwadd_vv_h, 4) 1035 GEN_VEXT_VV(vwadd_vv_w, 8) 1036 GEN_VEXT_VV(vwsub_vv_b, 2) 1037 GEN_VEXT_VV(vwsub_vv_h, 4) 1038 GEN_VEXT_VV(vwsub_vv_w, 8) 1039 GEN_VEXT_VV(vwaddu_wv_b, 2) 1040 GEN_VEXT_VV(vwaddu_wv_h, 4) 1041 GEN_VEXT_VV(vwaddu_wv_w, 8) 1042 GEN_VEXT_VV(vwsubu_wv_b, 2) 1043 GEN_VEXT_VV(vwsubu_wv_h, 4) 1044 GEN_VEXT_VV(vwsubu_wv_w, 8) 1045 GEN_VEXT_VV(vwadd_wv_b, 2) 1046 GEN_VEXT_VV(vwadd_wv_h, 4) 1047 GEN_VEXT_VV(vwadd_wv_w, 8) 1048 GEN_VEXT_VV(vwsub_wv_b, 2) 1049 GEN_VEXT_VV(vwsub_wv_h, 4) 1050 GEN_VEXT_VV(vwsub_wv_w, 8) 1051 1052 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1053 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1054 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1055 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1056 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1057 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1058 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1059 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1060 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1061 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1062 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1063 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1064 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1065 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1066 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1067 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1068 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1069 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1070 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1071 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1072 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1073 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1074 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1075 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1076 GEN_VEXT_VX(vwaddu_vx_b, 2) 1077 GEN_VEXT_VX(vwaddu_vx_h, 4) 1078 GEN_VEXT_VX(vwaddu_vx_w, 8) 1079 GEN_VEXT_VX(vwsubu_vx_b, 2) 1080 GEN_VEXT_VX(vwsubu_vx_h, 4) 1081 GEN_VEXT_VX(vwsubu_vx_w, 8) 1082 GEN_VEXT_VX(vwadd_vx_b, 2) 1083 GEN_VEXT_VX(vwadd_vx_h, 4) 1084 GEN_VEXT_VX(vwadd_vx_w, 8) 1085 GEN_VEXT_VX(vwsub_vx_b, 2) 1086 GEN_VEXT_VX(vwsub_vx_h, 4) 1087 GEN_VEXT_VX(vwsub_vx_w, 8) 1088 GEN_VEXT_VX(vwaddu_wx_b, 2) 1089 GEN_VEXT_VX(vwaddu_wx_h, 4) 1090 GEN_VEXT_VX(vwaddu_wx_w, 8) 1091 GEN_VEXT_VX(vwsubu_wx_b, 2) 1092 GEN_VEXT_VX(vwsubu_wx_h, 4) 1093 GEN_VEXT_VX(vwsubu_wx_w, 8) 1094 GEN_VEXT_VX(vwadd_wx_b, 2) 1095 GEN_VEXT_VX(vwadd_wx_h, 4) 1096 GEN_VEXT_VX(vwadd_wx_w, 8) 1097 GEN_VEXT_VX(vwsub_wx_b, 2) 1098 GEN_VEXT_VX(vwsub_wx_h, 4) 1099 GEN_VEXT_VX(vwsub_wx_w, 8) 1100 1101 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1102 #define DO_VADC(N, M, C) (N + M + C) 1103 #define DO_VSBC(N, M, C) (N - M - C) 1104 1105 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1106 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1107 CPURISCVState *env, uint32_t desc) \ 1108 { \ 1109 uint32_t vl = env->vl; \ 1110 uint32_t esz = sizeof(ETYPE); \ 1111 uint32_t total_elems = \ 1112 vext_get_total_elems(env, desc, esz); \ 1113 uint32_t vta = vext_vta(desc); \ 1114 uint32_t i; \ 1115 \ 1116 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1117 \ 1118 for (i = env->vstart; i < vl; i++) { \ 1119 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1120 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1121 ETYPE carry = vext_elem_mask(v0, i); \ 1122 \ 1123 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1124 } \ 1125 env->vstart = 0; \ 1126 /* set tail elements to 1s */ \ 1127 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1128 } 1129 1130 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1131 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1132 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1133 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1134 1135 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1136 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1137 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1138 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1139 1140 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1141 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1142 CPURISCVState *env, uint32_t desc) \ 1143 { \ 1144 uint32_t vl = env->vl; \ 1145 uint32_t esz = sizeof(ETYPE); \ 1146 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1147 uint32_t vta = vext_vta(desc); \ 1148 uint32_t i; \ 1149 \ 1150 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1151 \ 1152 for (i = env->vstart; i < vl; i++) { \ 1153 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1154 ETYPE carry = vext_elem_mask(v0, i); \ 1155 \ 1156 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1157 } \ 1158 env->vstart = 0; \ 1159 /* set tail elements to 1s */ \ 1160 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1161 } 1162 1163 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1164 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1165 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1166 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1167 1168 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1169 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1170 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1171 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1172 1173 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1174 (__typeof(N))(N + M) < N) 1175 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1176 1177 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1178 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1179 CPURISCVState *env, uint32_t desc) \ 1180 { \ 1181 uint32_t vl = env->vl; \ 1182 uint32_t vm = vext_vm(desc); \ 1183 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1184 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1185 uint32_t i; \ 1186 \ 1187 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1188 \ 1189 for (i = env->vstart; i < vl; i++) { \ 1190 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1191 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1192 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1193 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1194 } \ 1195 env->vstart = 0; \ 1196 /* 1197 * mask destination register are always tail-agnostic 1198 * set tail elements to 1s 1199 */ \ 1200 if (vta_all_1s) { \ 1201 for (; i < total_elems; i++) { \ 1202 vext_set_elem_mask(vd, i, 1); \ 1203 } \ 1204 } \ 1205 } 1206 1207 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1208 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1209 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1210 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1211 1212 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1213 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1214 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1215 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1216 1217 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1218 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1219 void *vs2, CPURISCVState *env, uint32_t desc) \ 1220 { \ 1221 uint32_t vl = env->vl; \ 1222 uint32_t vm = vext_vm(desc); \ 1223 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1224 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1225 uint32_t i; \ 1226 \ 1227 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1228 \ 1229 for (i = env->vstart; i < vl; i++) { \ 1230 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1231 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1232 vext_set_elem_mask(vd, i, \ 1233 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1234 } \ 1235 env->vstart = 0; \ 1236 /* 1237 * mask destination register are always tail-agnostic 1238 * set tail elements to 1s 1239 */ \ 1240 if (vta_all_1s) { \ 1241 for (; i < total_elems; i++) { \ 1242 vext_set_elem_mask(vd, i, 1); \ 1243 } \ 1244 } \ 1245 } 1246 1247 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1248 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1249 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1250 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1251 1252 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1253 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1254 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1255 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1256 1257 /* Vector Bitwise Logical Instructions */ 1258 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1259 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1260 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1261 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1262 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1263 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1264 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1265 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1266 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1267 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1268 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1269 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1270 GEN_VEXT_VV(vand_vv_b, 1) 1271 GEN_VEXT_VV(vand_vv_h, 2) 1272 GEN_VEXT_VV(vand_vv_w, 4) 1273 GEN_VEXT_VV(vand_vv_d, 8) 1274 GEN_VEXT_VV(vor_vv_b, 1) 1275 GEN_VEXT_VV(vor_vv_h, 2) 1276 GEN_VEXT_VV(vor_vv_w, 4) 1277 GEN_VEXT_VV(vor_vv_d, 8) 1278 GEN_VEXT_VV(vxor_vv_b, 1) 1279 GEN_VEXT_VV(vxor_vv_h, 2) 1280 GEN_VEXT_VV(vxor_vv_w, 4) 1281 GEN_VEXT_VV(vxor_vv_d, 8) 1282 1283 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1284 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1285 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1286 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1287 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1288 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1289 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1290 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1291 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1292 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1293 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1294 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1295 GEN_VEXT_VX(vand_vx_b, 1) 1296 GEN_VEXT_VX(vand_vx_h, 2) 1297 GEN_VEXT_VX(vand_vx_w, 4) 1298 GEN_VEXT_VX(vand_vx_d, 8) 1299 GEN_VEXT_VX(vor_vx_b, 1) 1300 GEN_VEXT_VX(vor_vx_h, 2) 1301 GEN_VEXT_VX(vor_vx_w, 4) 1302 GEN_VEXT_VX(vor_vx_d, 8) 1303 GEN_VEXT_VX(vxor_vx_b, 1) 1304 GEN_VEXT_VX(vxor_vx_h, 2) 1305 GEN_VEXT_VX(vxor_vx_w, 4) 1306 GEN_VEXT_VX(vxor_vx_d, 8) 1307 1308 /* Vector Single-Width Bit Shift Instructions */ 1309 #define DO_SLL(N, M) (N << (M)) 1310 #define DO_SRL(N, M) (N >> (M)) 1311 1312 /* generate the helpers for shift instructions with two vector operators */ 1313 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1314 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1315 void *vs2, CPURISCVState *env, uint32_t desc) \ 1316 { \ 1317 uint32_t vm = vext_vm(desc); \ 1318 uint32_t vl = env->vl; \ 1319 uint32_t esz = sizeof(TS1); \ 1320 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1321 uint32_t vta = vext_vta(desc); \ 1322 uint32_t vma = vext_vma(desc); \ 1323 uint32_t i; \ 1324 \ 1325 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1326 \ 1327 for (i = env->vstart; i < vl; i++) { \ 1328 if (!vm && !vext_elem_mask(v0, i)) { \ 1329 /* set masked-off elements to 1s */ \ 1330 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1331 continue; \ 1332 } \ 1333 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1334 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1335 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1336 } \ 1337 env->vstart = 0; \ 1338 /* set tail elements to 1s */ \ 1339 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1340 } 1341 1342 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1343 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1344 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1345 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1346 1347 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1348 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1349 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1350 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1351 1352 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1353 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1354 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1355 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1356 1357 /* 1358 * generate the helpers for shift instructions with one vector and one scalar 1359 */ 1360 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1361 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1362 void *vs2, CPURISCVState *env, \ 1363 uint32_t desc) \ 1364 { \ 1365 uint32_t vm = vext_vm(desc); \ 1366 uint32_t vl = env->vl; \ 1367 uint32_t esz = sizeof(TD); \ 1368 uint32_t total_elems = \ 1369 vext_get_total_elems(env, desc, esz); \ 1370 uint32_t vta = vext_vta(desc); \ 1371 uint32_t vma = vext_vma(desc); \ 1372 uint32_t i; \ 1373 \ 1374 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1375 \ 1376 for (i = env->vstart; i < vl; i++) { \ 1377 if (!vm && !vext_elem_mask(v0, i)) { \ 1378 /* set masked-off elements to 1s */ \ 1379 vext_set_elems_1s(vd, vma, i * esz, \ 1380 (i + 1) * esz); \ 1381 continue; \ 1382 } \ 1383 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1384 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1385 } \ 1386 env->vstart = 0; \ 1387 /* set tail elements to 1s */ \ 1388 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1389 } 1390 1391 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1392 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1393 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1394 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1395 1396 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1397 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1398 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1399 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1400 1401 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1402 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1403 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1404 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1405 1406 /* Vector Narrowing Integer Right Shift Instructions */ 1407 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1408 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1409 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1410 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1411 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1412 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1413 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1414 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1415 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1416 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1417 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1418 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1419 1420 /* Vector Integer Comparison Instructions */ 1421 #define DO_MSEQ(N, M) (N == M) 1422 #define DO_MSNE(N, M) (N != M) 1423 #define DO_MSLT(N, M) (N < M) 1424 #define DO_MSLE(N, M) (N <= M) 1425 #define DO_MSGT(N, M) (N > M) 1426 1427 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1428 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1429 CPURISCVState *env, uint32_t desc) \ 1430 { \ 1431 uint32_t vm = vext_vm(desc); \ 1432 uint32_t vl = env->vl; \ 1433 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1434 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1435 uint32_t vma = vext_vma(desc); \ 1436 uint32_t i; \ 1437 \ 1438 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1439 \ 1440 for (i = env->vstart; i < vl; i++) { \ 1441 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1442 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1443 if (!vm && !vext_elem_mask(v0, i)) { \ 1444 /* set masked-off elements to 1s */ \ 1445 if (vma) { \ 1446 vext_set_elem_mask(vd, i, 1); \ 1447 } \ 1448 continue; \ 1449 } \ 1450 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1451 } \ 1452 env->vstart = 0; \ 1453 /* 1454 * mask destination register are always tail-agnostic 1455 * set tail elements to 1s 1456 */ \ 1457 if (vta_all_1s) { \ 1458 for (; i < total_elems; i++) { \ 1459 vext_set_elem_mask(vd, i, 1); \ 1460 } \ 1461 } \ 1462 } 1463 1464 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1465 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1466 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1467 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1468 1469 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1470 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1471 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1472 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1473 1474 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1475 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1476 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1477 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1478 1479 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1480 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1481 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1482 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1483 1484 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1485 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1486 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1487 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1488 1489 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1490 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1491 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1492 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1493 1494 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1495 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1496 CPURISCVState *env, uint32_t desc) \ 1497 { \ 1498 uint32_t vm = vext_vm(desc); \ 1499 uint32_t vl = env->vl; \ 1500 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1501 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1502 uint32_t vma = vext_vma(desc); \ 1503 uint32_t i; \ 1504 \ 1505 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1506 \ 1507 for (i = env->vstart; i < vl; i++) { \ 1508 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1509 if (!vm && !vext_elem_mask(v0, i)) { \ 1510 /* set masked-off elements to 1s */ \ 1511 if (vma) { \ 1512 vext_set_elem_mask(vd, i, 1); \ 1513 } \ 1514 continue; \ 1515 } \ 1516 vext_set_elem_mask(vd, i, \ 1517 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1518 } \ 1519 env->vstart = 0; \ 1520 /* 1521 * mask destination register are always tail-agnostic 1522 * set tail elements to 1s 1523 */ \ 1524 if (vta_all_1s) { \ 1525 for (; i < total_elems; i++) { \ 1526 vext_set_elem_mask(vd, i, 1); \ 1527 } \ 1528 } \ 1529 } 1530 1531 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1532 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1533 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1534 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1535 1536 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1537 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1538 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1539 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1540 1541 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1542 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1543 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1544 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1545 1546 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1547 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1548 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1549 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1550 1551 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1552 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1553 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1554 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1555 1556 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1557 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1558 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1559 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1560 1561 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1562 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1563 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1564 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1565 1566 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1567 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1568 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1569 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1570 1571 /* Vector Integer Min/Max Instructions */ 1572 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1573 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1574 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1575 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1576 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1577 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1578 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1579 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1580 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1581 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1582 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1583 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1584 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1585 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1586 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1587 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1588 GEN_VEXT_VV(vminu_vv_b, 1) 1589 GEN_VEXT_VV(vminu_vv_h, 2) 1590 GEN_VEXT_VV(vminu_vv_w, 4) 1591 GEN_VEXT_VV(vminu_vv_d, 8) 1592 GEN_VEXT_VV(vmin_vv_b, 1) 1593 GEN_VEXT_VV(vmin_vv_h, 2) 1594 GEN_VEXT_VV(vmin_vv_w, 4) 1595 GEN_VEXT_VV(vmin_vv_d, 8) 1596 GEN_VEXT_VV(vmaxu_vv_b, 1) 1597 GEN_VEXT_VV(vmaxu_vv_h, 2) 1598 GEN_VEXT_VV(vmaxu_vv_w, 4) 1599 GEN_VEXT_VV(vmaxu_vv_d, 8) 1600 GEN_VEXT_VV(vmax_vv_b, 1) 1601 GEN_VEXT_VV(vmax_vv_h, 2) 1602 GEN_VEXT_VV(vmax_vv_w, 4) 1603 GEN_VEXT_VV(vmax_vv_d, 8) 1604 1605 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1606 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1607 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1608 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1609 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1610 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1611 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1612 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1613 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1614 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1615 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1616 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1617 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1618 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1619 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1620 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1621 GEN_VEXT_VX(vminu_vx_b, 1) 1622 GEN_VEXT_VX(vminu_vx_h, 2) 1623 GEN_VEXT_VX(vminu_vx_w, 4) 1624 GEN_VEXT_VX(vminu_vx_d, 8) 1625 GEN_VEXT_VX(vmin_vx_b, 1) 1626 GEN_VEXT_VX(vmin_vx_h, 2) 1627 GEN_VEXT_VX(vmin_vx_w, 4) 1628 GEN_VEXT_VX(vmin_vx_d, 8) 1629 GEN_VEXT_VX(vmaxu_vx_b, 1) 1630 GEN_VEXT_VX(vmaxu_vx_h, 2) 1631 GEN_VEXT_VX(vmaxu_vx_w, 4) 1632 GEN_VEXT_VX(vmaxu_vx_d, 8) 1633 GEN_VEXT_VX(vmax_vx_b, 1) 1634 GEN_VEXT_VX(vmax_vx_h, 2) 1635 GEN_VEXT_VX(vmax_vx_w, 4) 1636 GEN_VEXT_VX(vmax_vx_d, 8) 1637 1638 /* Vector Single-Width Integer Multiply Instructions */ 1639 #define DO_MUL(N, M) (N * M) 1640 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1641 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1642 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1643 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1644 GEN_VEXT_VV(vmul_vv_b, 1) 1645 GEN_VEXT_VV(vmul_vv_h, 2) 1646 GEN_VEXT_VV(vmul_vv_w, 4) 1647 GEN_VEXT_VV(vmul_vv_d, 8) 1648 1649 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1650 { 1651 return (int16_t)s2 * (int16_t)s1 >> 8; 1652 } 1653 1654 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1655 { 1656 return (int32_t)s2 * (int32_t)s1 >> 16; 1657 } 1658 1659 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1660 { 1661 return (int64_t)s2 * (int64_t)s1 >> 32; 1662 } 1663 1664 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1665 { 1666 uint64_t hi_64, lo_64; 1667 1668 muls64(&lo_64, &hi_64, s1, s2); 1669 return hi_64; 1670 } 1671 1672 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1673 { 1674 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1675 } 1676 1677 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1678 { 1679 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1680 } 1681 1682 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1683 { 1684 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1685 } 1686 1687 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1688 { 1689 uint64_t hi_64, lo_64; 1690 1691 mulu64(&lo_64, &hi_64, s2, s1); 1692 return hi_64; 1693 } 1694 1695 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1696 { 1697 return (int16_t)s2 * (uint16_t)s1 >> 8; 1698 } 1699 1700 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1701 { 1702 return (int32_t)s2 * (uint32_t)s1 >> 16; 1703 } 1704 1705 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1706 { 1707 return (int64_t)s2 * (uint64_t)s1 >> 32; 1708 } 1709 1710 /* 1711 * Let A = signed operand, 1712 * B = unsigned operand 1713 * P = mulu64(A, B), unsigned product 1714 * 1715 * LET X = 2 ** 64 - A, 2's complement of A 1716 * SP = signed product 1717 * THEN 1718 * IF A < 0 1719 * SP = -X * B 1720 * = -(2 ** 64 - A) * B 1721 * = A * B - 2 ** 64 * B 1722 * = P - 2 ** 64 * B 1723 * ELSE 1724 * SP = P 1725 * THEN 1726 * HI_P -= (A < 0 ? B : 0) 1727 */ 1728 1729 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1730 { 1731 uint64_t hi_64, lo_64; 1732 1733 mulu64(&lo_64, &hi_64, s2, s1); 1734 1735 hi_64 -= s2 < 0 ? s1 : 0; 1736 return hi_64; 1737 } 1738 1739 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1740 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1741 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1742 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1743 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1744 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1745 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1746 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1747 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1748 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1749 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1750 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1751 GEN_VEXT_VV(vmulh_vv_b, 1) 1752 GEN_VEXT_VV(vmulh_vv_h, 2) 1753 GEN_VEXT_VV(vmulh_vv_w, 4) 1754 GEN_VEXT_VV(vmulh_vv_d, 8) 1755 GEN_VEXT_VV(vmulhu_vv_b, 1) 1756 GEN_VEXT_VV(vmulhu_vv_h, 2) 1757 GEN_VEXT_VV(vmulhu_vv_w, 4) 1758 GEN_VEXT_VV(vmulhu_vv_d, 8) 1759 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1760 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1761 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1762 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1763 1764 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1765 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1766 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1767 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1768 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1769 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1770 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1771 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1772 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1773 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1774 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1775 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1776 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1777 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1778 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1779 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1780 GEN_VEXT_VX(vmul_vx_b, 1) 1781 GEN_VEXT_VX(vmul_vx_h, 2) 1782 GEN_VEXT_VX(vmul_vx_w, 4) 1783 GEN_VEXT_VX(vmul_vx_d, 8) 1784 GEN_VEXT_VX(vmulh_vx_b, 1) 1785 GEN_VEXT_VX(vmulh_vx_h, 2) 1786 GEN_VEXT_VX(vmulh_vx_w, 4) 1787 GEN_VEXT_VX(vmulh_vx_d, 8) 1788 GEN_VEXT_VX(vmulhu_vx_b, 1) 1789 GEN_VEXT_VX(vmulhu_vx_h, 2) 1790 GEN_VEXT_VX(vmulhu_vx_w, 4) 1791 GEN_VEXT_VX(vmulhu_vx_d, 8) 1792 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1793 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1794 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1795 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1796 1797 /* Vector Integer Divide Instructions */ 1798 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1799 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1800 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1801 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1802 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1803 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1804 1805 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1806 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1807 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1808 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1809 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1810 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1811 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1812 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1813 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1814 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1815 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1816 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1817 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1818 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1819 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1820 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1821 GEN_VEXT_VV(vdivu_vv_b, 1) 1822 GEN_VEXT_VV(vdivu_vv_h, 2) 1823 GEN_VEXT_VV(vdivu_vv_w, 4) 1824 GEN_VEXT_VV(vdivu_vv_d, 8) 1825 GEN_VEXT_VV(vdiv_vv_b, 1) 1826 GEN_VEXT_VV(vdiv_vv_h, 2) 1827 GEN_VEXT_VV(vdiv_vv_w, 4) 1828 GEN_VEXT_VV(vdiv_vv_d, 8) 1829 GEN_VEXT_VV(vremu_vv_b, 1) 1830 GEN_VEXT_VV(vremu_vv_h, 2) 1831 GEN_VEXT_VV(vremu_vv_w, 4) 1832 GEN_VEXT_VV(vremu_vv_d, 8) 1833 GEN_VEXT_VV(vrem_vv_b, 1) 1834 GEN_VEXT_VV(vrem_vv_h, 2) 1835 GEN_VEXT_VV(vrem_vv_w, 4) 1836 GEN_VEXT_VV(vrem_vv_d, 8) 1837 1838 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1839 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1840 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1841 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1842 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1843 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1844 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1845 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1846 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1847 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1848 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1849 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1850 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1851 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1852 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1853 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1854 GEN_VEXT_VX(vdivu_vx_b, 1) 1855 GEN_VEXT_VX(vdivu_vx_h, 2) 1856 GEN_VEXT_VX(vdivu_vx_w, 4) 1857 GEN_VEXT_VX(vdivu_vx_d, 8) 1858 GEN_VEXT_VX(vdiv_vx_b, 1) 1859 GEN_VEXT_VX(vdiv_vx_h, 2) 1860 GEN_VEXT_VX(vdiv_vx_w, 4) 1861 GEN_VEXT_VX(vdiv_vx_d, 8) 1862 GEN_VEXT_VX(vremu_vx_b, 1) 1863 GEN_VEXT_VX(vremu_vx_h, 2) 1864 GEN_VEXT_VX(vremu_vx_w, 4) 1865 GEN_VEXT_VX(vremu_vx_d, 8) 1866 GEN_VEXT_VX(vrem_vx_b, 1) 1867 GEN_VEXT_VX(vrem_vx_h, 2) 1868 GEN_VEXT_VX(vrem_vx_w, 4) 1869 GEN_VEXT_VX(vrem_vx_d, 8) 1870 1871 /* Vector Widening Integer Multiply Instructions */ 1872 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1873 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1874 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1875 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1876 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1877 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1878 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1879 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1880 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1881 GEN_VEXT_VV(vwmul_vv_b, 2) 1882 GEN_VEXT_VV(vwmul_vv_h, 4) 1883 GEN_VEXT_VV(vwmul_vv_w, 8) 1884 GEN_VEXT_VV(vwmulu_vv_b, 2) 1885 GEN_VEXT_VV(vwmulu_vv_h, 4) 1886 GEN_VEXT_VV(vwmulu_vv_w, 8) 1887 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1888 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1889 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1890 1891 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1892 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1893 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1894 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1895 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1896 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1897 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1898 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1899 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1900 GEN_VEXT_VX(vwmul_vx_b, 2) 1901 GEN_VEXT_VX(vwmul_vx_h, 4) 1902 GEN_VEXT_VX(vwmul_vx_w, 8) 1903 GEN_VEXT_VX(vwmulu_vx_b, 2) 1904 GEN_VEXT_VX(vwmulu_vx_h, 4) 1905 GEN_VEXT_VX(vwmulu_vx_w, 8) 1906 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1907 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1908 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1909 1910 /* Vector Single-Width Integer Multiply-Add Instructions */ 1911 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1912 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1913 { \ 1914 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1915 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1916 TD d = *((TD *)vd + HD(i)); \ 1917 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1918 } 1919 1920 #define DO_MACC(N, M, D) (M * N + D) 1921 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1922 #define DO_MADD(N, M, D) (M * D + N) 1923 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1924 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1925 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1926 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1927 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1928 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1929 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1930 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1931 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1932 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1933 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1934 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1935 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1936 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1937 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1938 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1939 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1940 GEN_VEXT_VV(vmacc_vv_b, 1) 1941 GEN_VEXT_VV(vmacc_vv_h, 2) 1942 GEN_VEXT_VV(vmacc_vv_w, 4) 1943 GEN_VEXT_VV(vmacc_vv_d, 8) 1944 GEN_VEXT_VV(vnmsac_vv_b, 1) 1945 GEN_VEXT_VV(vnmsac_vv_h, 2) 1946 GEN_VEXT_VV(vnmsac_vv_w, 4) 1947 GEN_VEXT_VV(vnmsac_vv_d, 8) 1948 GEN_VEXT_VV(vmadd_vv_b, 1) 1949 GEN_VEXT_VV(vmadd_vv_h, 2) 1950 GEN_VEXT_VV(vmadd_vv_w, 4) 1951 GEN_VEXT_VV(vmadd_vv_d, 8) 1952 GEN_VEXT_VV(vnmsub_vv_b, 1) 1953 GEN_VEXT_VV(vnmsub_vv_h, 2) 1954 GEN_VEXT_VV(vnmsub_vv_w, 4) 1955 GEN_VEXT_VV(vnmsub_vv_d, 8) 1956 1957 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1958 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1959 { \ 1960 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1961 TD d = *((TD *)vd + HD(i)); \ 1962 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1963 } 1964 1965 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1966 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1967 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1968 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1969 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1970 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1971 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1972 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1973 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1974 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1975 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1976 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1977 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1978 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1979 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1980 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1981 GEN_VEXT_VX(vmacc_vx_b, 1) 1982 GEN_VEXT_VX(vmacc_vx_h, 2) 1983 GEN_VEXT_VX(vmacc_vx_w, 4) 1984 GEN_VEXT_VX(vmacc_vx_d, 8) 1985 GEN_VEXT_VX(vnmsac_vx_b, 1) 1986 GEN_VEXT_VX(vnmsac_vx_h, 2) 1987 GEN_VEXT_VX(vnmsac_vx_w, 4) 1988 GEN_VEXT_VX(vnmsac_vx_d, 8) 1989 GEN_VEXT_VX(vmadd_vx_b, 1) 1990 GEN_VEXT_VX(vmadd_vx_h, 2) 1991 GEN_VEXT_VX(vmadd_vx_w, 4) 1992 GEN_VEXT_VX(vmadd_vx_d, 8) 1993 GEN_VEXT_VX(vnmsub_vx_b, 1) 1994 GEN_VEXT_VX(vnmsub_vx_h, 2) 1995 GEN_VEXT_VX(vnmsub_vx_w, 4) 1996 GEN_VEXT_VX(vnmsub_vx_d, 8) 1997 1998 /* Vector Widening Integer Multiply-Add Instructions */ 1999 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 2000 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 2001 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 2002 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 2003 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 2004 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 2005 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 2006 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 2007 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 2008 GEN_VEXT_VV(vwmaccu_vv_b, 2) 2009 GEN_VEXT_VV(vwmaccu_vv_h, 4) 2010 GEN_VEXT_VV(vwmaccu_vv_w, 8) 2011 GEN_VEXT_VV(vwmacc_vv_b, 2) 2012 GEN_VEXT_VV(vwmacc_vv_h, 4) 2013 GEN_VEXT_VV(vwmacc_vv_w, 8) 2014 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 2015 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 2016 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 2017 2018 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 2019 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 2020 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 2021 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 2022 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 2023 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 2024 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 2025 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 2026 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 2027 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 2028 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 2029 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 2030 GEN_VEXT_VX(vwmaccu_vx_b, 2) 2031 GEN_VEXT_VX(vwmaccu_vx_h, 4) 2032 GEN_VEXT_VX(vwmaccu_vx_w, 8) 2033 GEN_VEXT_VX(vwmacc_vx_b, 2) 2034 GEN_VEXT_VX(vwmacc_vx_h, 4) 2035 GEN_VEXT_VX(vwmacc_vx_w, 8) 2036 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 2037 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 2038 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 2039 GEN_VEXT_VX(vwmaccus_vx_b, 2) 2040 GEN_VEXT_VX(vwmaccus_vx_h, 4) 2041 GEN_VEXT_VX(vwmaccus_vx_w, 8) 2042 2043 /* Vector Integer Merge and Move Instructions */ 2044 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 2045 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2046 uint32_t desc) \ 2047 { \ 2048 uint32_t vl = env->vl; \ 2049 uint32_t esz = sizeof(ETYPE); \ 2050 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2051 uint32_t vta = vext_vta(desc); \ 2052 uint32_t i; \ 2053 \ 2054 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2055 \ 2056 for (i = env->vstart; i < vl; i++) { \ 2057 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2058 *((ETYPE *)vd + H(i)) = s1; \ 2059 } \ 2060 env->vstart = 0; \ 2061 /* set tail elements to 1s */ \ 2062 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2063 } 2064 2065 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2066 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2067 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2068 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2069 2070 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2071 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2072 uint32_t desc) \ 2073 { \ 2074 uint32_t vl = env->vl; \ 2075 uint32_t esz = sizeof(ETYPE); \ 2076 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2077 uint32_t vta = vext_vta(desc); \ 2078 uint32_t i; \ 2079 \ 2080 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2081 \ 2082 for (i = env->vstart; i < vl; i++) { \ 2083 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2084 } \ 2085 env->vstart = 0; \ 2086 /* set tail elements to 1s */ \ 2087 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2088 } 2089 2090 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2091 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2092 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2093 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2094 2095 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2096 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2097 CPURISCVState *env, uint32_t desc) \ 2098 { \ 2099 uint32_t vl = env->vl; \ 2100 uint32_t esz = sizeof(ETYPE); \ 2101 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2102 uint32_t vta = vext_vta(desc); \ 2103 uint32_t i; \ 2104 \ 2105 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2106 \ 2107 for (i = env->vstart; i < vl; i++) { \ 2108 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2109 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2110 } \ 2111 env->vstart = 0; \ 2112 /* set tail elements to 1s */ \ 2113 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2114 } 2115 2116 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2117 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2118 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2119 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2120 2121 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2122 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2123 void *vs2, CPURISCVState *env, uint32_t desc) \ 2124 { \ 2125 uint32_t vl = env->vl; \ 2126 uint32_t esz = sizeof(ETYPE); \ 2127 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2128 uint32_t vta = vext_vta(desc); \ 2129 uint32_t i; \ 2130 \ 2131 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2132 \ 2133 for (i = env->vstart; i < vl; i++) { \ 2134 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2135 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2136 (ETYPE)(target_long)s1); \ 2137 *((ETYPE *)vd + H(i)) = d; \ 2138 } \ 2139 env->vstart = 0; \ 2140 /* set tail elements to 1s */ \ 2141 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2142 } 2143 2144 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2145 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2146 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2147 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2148 2149 /* 2150 * Vector Fixed-Point Arithmetic Instructions 2151 */ 2152 2153 /* Vector Single-Width Saturating Add and Subtract */ 2154 2155 /* 2156 * As fixed point instructions probably have round mode and saturation, 2157 * define common macros for fixed point here. 2158 */ 2159 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2160 CPURISCVState *env, int vxrm); 2161 2162 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2163 static inline void \ 2164 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2165 CPURISCVState *env, int vxrm) \ 2166 { \ 2167 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2168 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2169 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2170 } 2171 2172 static inline void 2173 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2174 CPURISCVState *env, 2175 uint32_t vl, uint32_t vm, int vxrm, 2176 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2177 { 2178 for (uint32_t i = env->vstart; i < vl; i++) { 2179 if (!vm && !vext_elem_mask(v0, i)) { 2180 /* set masked-off elements to 1s */ 2181 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2182 continue; 2183 } 2184 fn(vd, vs1, vs2, i, env, vxrm); 2185 } 2186 env->vstart = 0; 2187 } 2188 2189 static inline void 2190 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2191 CPURISCVState *env, 2192 uint32_t desc, 2193 opivv2_rm_fn *fn, uint32_t esz) 2194 { 2195 uint32_t vm = vext_vm(desc); 2196 uint32_t vl = env->vl; 2197 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2198 uint32_t vta = vext_vta(desc); 2199 uint32_t vma = vext_vma(desc); 2200 2201 VSTART_CHECK_EARLY_EXIT(env, vl); 2202 2203 switch (env->vxrm) { 2204 case 0: /* rnu */ 2205 vext_vv_rm_1(vd, v0, vs1, vs2, 2206 env, vl, vm, 0, fn, vma, esz); 2207 break; 2208 case 1: /* rne */ 2209 vext_vv_rm_1(vd, v0, vs1, vs2, 2210 env, vl, vm, 1, fn, vma, esz); 2211 break; 2212 case 2: /* rdn */ 2213 vext_vv_rm_1(vd, v0, vs1, vs2, 2214 env, vl, vm, 2, fn, vma, esz); 2215 break; 2216 default: /* rod */ 2217 vext_vv_rm_1(vd, v0, vs1, vs2, 2218 env, vl, vm, 3, fn, vma, esz); 2219 break; 2220 } 2221 /* set tail elements to 1s */ 2222 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2223 } 2224 2225 /* generate helpers for fixed point instructions with OPIVV format */ 2226 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2227 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2228 CPURISCVState *env, uint32_t desc) \ 2229 { \ 2230 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2231 do_##NAME, ESZ); \ 2232 } 2233 2234 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2235 uint8_t b) 2236 { 2237 uint8_t res = a + b; 2238 if (res < a) { 2239 res = UINT8_MAX; 2240 env->vxsat = 0x1; 2241 } 2242 return res; 2243 } 2244 2245 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2246 uint16_t b) 2247 { 2248 uint16_t res = a + b; 2249 if (res < a) { 2250 res = UINT16_MAX; 2251 env->vxsat = 0x1; 2252 } 2253 return res; 2254 } 2255 2256 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2257 uint32_t b) 2258 { 2259 uint32_t res = a + b; 2260 if (res < a) { 2261 res = UINT32_MAX; 2262 env->vxsat = 0x1; 2263 } 2264 return res; 2265 } 2266 2267 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2268 uint64_t b) 2269 { 2270 uint64_t res = a + b; 2271 if (res < a) { 2272 res = UINT64_MAX; 2273 env->vxsat = 0x1; 2274 } 2275 return res; 2276 } 2277 2278 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2279 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2280 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2281 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2282 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2283 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2284 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2285 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2286 2287 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2288 CPURISCVState *env, int vxrm); 2289 2290 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2291 static inline void \ 2292 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2293 CPURISCVState *env, int vxrm) \ 2294 { \ 2295 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2296 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2297 } 2298 2299 static inline void 2300 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2301 CPURISCVState *env, 2302 uint32_t vl, uint32_t vm, int vxrm, 2303 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2304 { 2305 for (uint32_t i = env->vstart; i < vl; i++) { 2306 if (!vm && !vext_elem_mask(v0, i)) { 2307 /* set masked-off elements to 1s */ 2308 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2309 continue; 2310 } 2311 fn(vd, s1, vs2, i, env, vxrm); 2312 } 2313 env->vstart = 0; 2314 } 2315 2316 static inline void 2317 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2318 CPURISCVState *env, 2319 uint32_t desc, 2320 opivx2_rm_fn *fn, uint32_t esz) 2321 { 2322 uint32_t vm = vext_vm(desc); 2323 uint32_t vl = env->vl; 2324 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2325 uint32_t vta = vext_vta(desc); 2326 uint32_t vma = vext_vma(desc); 2327 2328 VSTART_CHECK_EARLY_EXIT(env, vl); 2329 2330 switch (env->vxrm) { 2331 case 0: /* rnu */ 2332 vext_vx_rm_1(vd, v0, s1, vs2, 2333 env, vl, vm, 0, fn, vma, esz); 2334 break; 2335 case 1: /* rne */ 2336 vext_vx_rm_1(vd, v0, s1, vs2, 2337 env, vl, vm, 1, fn, vma, esz); 2338 break; 2339 case 2: /* rdn */ 2340 vext_vx_rm_1(vd, v0, s1, vs2, 2341 env, vl, vm, 2, fn, vma, esz); 2342 break; 2343 default: /* rod */ 2344 vext_vx_rm_1(vd, v0, s1, vs2, 2345 env, vl, vm, 3, fn, vma, esz); 2346 break; 2347 } 2348 /* set tail elements to 1s */ 2349 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2350 } 2351 2352 /* generate helpers for fixed point instructions with OPIVX format */ 2353 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2354 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2355 void *vs2, CPURISCVState *env, \ 2356 uint32_t desc) \ 2357 { \ 2358 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2359 do_##NAME, ESZ); \ 2360 } 2361 2362 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2363 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2364 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2365 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2366 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2367 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2368 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2369 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2370 2371 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2372 { 2373 int8_t res = a + b; 2374 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2375 res = a > 0 ? INT8_MAX : INT8_MIN; 2376 env->vxsat = 0x1; 2377 } 2378 return res; 2379 } 2380 2381 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2382 int16_t b) 2383 { 2384 int16_t res = a + b; 2385 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2386 res = a > 0 ? INT16_MAX : INT16_MIN; 2387 env->vxsat = 0x1; 2388 } 2389 return res; 2390 } 2391 2392 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2393 int32_t b) 2394 { 2395 int32_t res = a + b; 2396 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2397 res = a > 0 ? INT32_MAX : INT32_MIN; 2398 env->vxsat = 0x1; 2399 } 2400 return res; 2401 } 2402 2403 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2404 int64_t b) 2405 { 2406 int64_t res = a + b; 2407 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2408 res = a > 0 ? INT64_MAX : INT64_MIN; 2409 env->vxsat = 0x1; 2410 } 2411 return res; 2412 } 2413 2414 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2415 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2416 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2417 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2418 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2419 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2420 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2421 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2422 2423 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2424 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2425 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2426 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2427 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2428 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2429 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2430 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2431 2432 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2433 uint8_t b) 2434 { 2435 uint8_t res = a - b; 2436 if (res > a) { 2437 res = 0; 2438 env->vxsat = 0x1; 2439 } 2440 return res; 2441 } 2442 2443 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2444 uint16_t b) 2445 { 2446 uint16_t res = a - b; 2447 if (res > a) { 2448 res = 0; 2449 env->vxsat = 0x1; 2450 } 2451 return res; 2452 } 2453 2454 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2455 uint32_t b) 2456 { 2457 uint32_t res = a - b; 2458 if (res > a) { 2459 res = 0; 2460 env->vxsat = 0x1; 2461 } 2462 return res; 2463 } 2464 2465 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2466 uint64_t b) 2467 { 2468 uint64_t res = a - b; 2469 if (res > a) { 2470 res = 0; 2471 env->vxsat = 0x1; 2472 } 2473 return res; 2474 } 2475 2476 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2477 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2478 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2479 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2480 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2481 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2482 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2483 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2484 2485 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2486 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2487 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2488 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2489 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2490 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2491 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2492 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2493 2494 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2495 { 2496 int8_t res = a - b; 2497 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2498 res = a >= 0 ? INT8_MAX : INT8_MIN; 2499 env->vxsat = 0x1; 2500 } 2501 return res; 2502 } 2503 2504 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2505 int16_t b) 2506 { 2507 int16_t res = a - b; 2508 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2509 res = a >= 0 ? INT16_MAX : INT16_MIN; 2510 env->vxsat = 0x1; 2511 } 2512 return res; 2513 } 2514 2515 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2516 int32_t b) 2517 { 2518 int32_t res = a - b; 2519 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2520 res = a >= 0 ? INT32_MAX : INT32_MIN; 2521 env->vxsat = 0x1; 2522 } 2523 return res; 2524 } 2525 2526 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2527 int64_t b) 2528 { 2529 int64_t res = a - b; 2530 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2531 res = a >= 0 ? INT64_MAX : INT64_MIN; 2532 env->vxsat = 0x1; 2533 } 2534 return res; 2535 } 2536 2537 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2538 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2539 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2540 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2541 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2542 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2543 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2544 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2545 2546 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2547 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2548 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2549 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2550 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2551 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2552 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2553 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2554 2555 /* Vector Single-Width Averaging Add and Subtract */ 2556 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2557 { 2558 uint8_t d = extract64(v, shift, 1); 2559 uint8_t d1; 2560 uint64_t D1, D2; 2561 2562 if (shift == 0 || shift > 64) { 2563 return 0; 2564 } 2565 2566 d1 = extract64(v, shift - 1, 1); 2567 D1 = extract64(v, 0, shift); 2568 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2569 return d1; 2570 } else if (vxrm == 1) { /* round-to-nearest-even */ 2571 if (shift > 1) { 2572 D2 = extract64(v, 0, shift - 1); 2573 return d1 & ((D2 != 0) | d); 2574 } else { 2575 return d1 & d; 2576 } 2577 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2578 return !d & (D1 != 0); 2579 } 2580 return 0; /* round-down (truncate) */ 2581 } 2582 2583 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2584 int32_t b) 2585 { 2586 int64_t res = (int64_t)a + b; 2587 uint8_t round = get_round(vxrm, res, 1); 2588 2589 return (res >> 1) + round; 2590 } 2591 2592 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2593 int64_t b) 2594 { 2595 int64_t res = a + b; 2596 uint8_t round = get_round(vxrm, res, 1); 2597 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2598 2599 /* With signed overflow, bit 64 is inverse of bit 63. */ 2600 return ((res >> 1) ^ over) + round; 2601 } 2602 2603 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2604 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2605 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2606 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2607 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2608 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2609 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2610 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2611 2612 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2613 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2614 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2615 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2616 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2617 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2618 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2619 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2620 2621 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2622 uint32_t a, uint32_t b) 2623 { 2624 uint64_t res = (uint64_t)a + b; 2625 uint8_t round = get_round(vxrm, res, 1); 2626 2627 return (res >> 1) + round; 2628 } 2629 2630 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2631 uint64_t a, uint64_t b) 2632 { 2633 uint64_t res = a + b; 2634 uint8_t round = get_round(vxrm, res, 1); 2635 uint64_t over = (uint64_t)(res < a) << 63; 2636 2637 return ((res >> 1) | over) + round; 2638 } 2639 2640 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2641 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2642 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2643 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2644 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2645 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2646 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2647 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2648 2649 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2650 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2651 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2652 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2653 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2654 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2655 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2656 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2657 2658 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2659 int32_t b) 2660 { 2661 int64_t res = (int64_t)a - b; 2662 uint8_t round = get_round(vxrm, res, 1); 2663 2664 return (res >> 1) + round; 2665 } 2666 2667 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2668 int64_t b) 2669 { 2670 int64_t res = (int64_t)a - b; 2671 uint8_t round = get_round(vxrm, res, 1); 2672 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2673 2674 /* With signed overflow, bit 64 is inverse of bit 63. */ 2675 return ((res >> 1) ^ over) + round; 2676 } 2677 2678 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2679 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2680 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2681 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2682 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2683 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2684 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2685 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2686 2687 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2688 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2689 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2690 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2691 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2692 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2693 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2694 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2695 2696 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2697 uint32_t a, uint32_t b) 2698 { 2699 int64_t res = (int64_t)a - b; 2700 uint8_t round = get_round(vxrm, res, 1); 2701 2702 return (res >> 1) + round; 2703 } 2704 2705 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2706 uint64_t a, uint64_t b) 2707 { 2708 uint64_t res = (uint64_t)a - b; 2709 uint8_t round = get_round(vxrm, res, 1); 2710 uint64_t over = (uint64_t)(res > a) << 63; 2711 2712 return ((res >> 1) | over) + round; 2713 } 2714 2715 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2716 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2717 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2718 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2719 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2720 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2721 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2722 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2723 2724 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2725 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2726 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2727 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2728 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2729 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2730 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2731 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2732 2733 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2734 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2735 { 2736 uint8_t round; 2737 int16_t res; 2738 2739 res = (int16_t)a * (int16_t)b; 2740 round = get_round(vxrm, res, 7); 2741 res = (res >> 7) + round; 2742 2743 if (res > INT8_MAX) { 2744 env->vxsat = 0x1; 2745 return INT8_MAX; 2746 } else if (res < INT8_MIN) { 2747 env->vxsat = 0x1; 2748 return INT8_MIN; 2749 } else { 2750 return res; 2751 } 2752 } 2753 2754 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2755 { 2756 uint8_t round; 2757 int32_t res; 2758 2759 res = (int32_t)a * (int32_t)b; 2760 round = get_round(vxrm, res, 15); 2761 res = (res >> 15) + round; 2762 2763 if (res > INT16_MAX) { 2764 env->vxsat = 0x1; 2765 return INT16_MAX; 2766 } else if (res < INT16_MIN) { 2767 env->vxsat = 0x1; 2768 return INT16_MIN; 2769 } else { 2770 return res; 2771 } 2772 } 2773 2774 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2775 { 2776 uint8_t round; 2777 int64_t res; 2778 2779 res = (int64_t)a * (int64_t)b; 2780 round = get_round(vxrm, res, 31); 2781 res = (res >> 31) + round; 2782 2783 if (res > INT32_MAX) { 2784 env->vxsat = 0x1; 2785 return INT32_MAX; 2786 } else if (res < INT32_MIN) { 2787 env->vxsat = 0x1; 2788 return INT32_MIN; 2789 } else { 2790 return res; 2791 } 2792 } 2793 2794 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2795 { 2796 uint8_t round; 2797 uint64_t hi_64, lo_64; 2798 int64_t res; 2799 2800 if (a == INT64_MIN && b == INT64_MIN) { 2801 env->vxsat = 1; 2802 return INT64_MAX; 2803 } 2804 2805 muls64(&lo_64, &hi_64, a, b); 2806 round = get_round(vxrm, lo_64, 63); 2807 /* 2808 * Cannot overflow, as there are always 2809 * 2 sign bits after multiply. 2810 */ 2811 res = (hi_64 << 1) | (lo_64 >> 63); 2812 if (round) { 2813 if (res == INT64_MAX) { 2814 env->vxsat = 1; 2815 } else { 2816 res += 1; 2817 } 2818 } 2819 return res; 2820 } 2821 2822 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2823 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2824 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2825 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2826 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2827 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2828 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2829 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2830 2831 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2832 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2833 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2834 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2835 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2836 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2837 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2838 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2839 2840 /* Vector Single-Width Scaling Shift Instructions */ 2841 static inline uint8_t 2842 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2843 { 2844 uint8_t round, shift = b & 0x7; 2845 uint8_t res; 2846 2847 round = get_round(vxrm, a, shift); 2848 res = (a >> shift) + round; 2849 return res; 2850 } 2851 static inline uint16_t 2852 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2853 { 2854 uint8_t round, shift = b & 0xf; 2855 2856 round = get_round(vxrm, a, shift); 2857 return (a >> shift) + round; 2858 } 2859 static inline uint32_t 2860 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2861 { 2862 uint8_t round, shift = b & 0x1f; 2863 2864 round = get_round(vxrm, a, shift); 2865 return (a >> shift) + round; 2866 } 2867 static inline uint64_t 2868 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2869 { 2870 uint8_t round, shift = b & 0x3f; 2871 2872 round = get_round(vxrm, a, shift); 2873 return (a >> shift) + round; 2874 } 2875 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2876 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2877 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2878 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2879 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2880 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2881 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2882 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2883 2884 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2885 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2886 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2887 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2888 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2889 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2890 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2891 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2892 2893 static inline int8_t 2894 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2895 { 2896 uint8_t round, shift = b & 0x7; 2897 2898 round = get_round(vxrm, a, shift); 2899 return (a >> shift) + round; 2900 } 2901 static inline int16_t 2902 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2903 { 2904 uint8_t round, shift = b & 0xf; 2905 2906 round = get_round(vxrm, a, shift); 2907 return (a >> shift) + round; 2908 } 2909 static inline int32_t 2910 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2911 { 2912 uint8_t round, shift = b & 0x1f; 2913 2914 round = get_round(vxrm, a, shift); 2915 return (a >> shift) + round; 2916 } 2917 static inline int64_t 2918 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2919 { 2920 uint8_t round, shift = b & 0x3f; 2921 2922 round = get_round(vxrm, a, shift); 2923 return (a >> shift) + round; 2924 } 2925 2926 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2927 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2928 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2929 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2930 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2931 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2932 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2933 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2934 2935 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2936 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2937 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2938 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2939 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2940 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2941 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2942 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2943 2944 /* Vector Narrowing Fixed-Point Clip Instructions */ 2945 static inline int8_t 2946 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2947 { 2948 uint8_t round, shift = b & 0xf; 2949 int16_t res; 2950 2951 round = get_round(vxrm, a, shift); 2952 res = (a >> shift) + round; 2953 if (res > INT8_MAX) { 2954 env->vxsat = 0x1; 2955 return INT8_MAX; 2956 } else if (res < INT8_MIN) { 2957 env->vxsat = 0x1; 2958 return INT8_MIN; 2959 } else { 2960 return res; 2961 } 2962 } 2963 2964 static inline int16_t 2965 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2966 { 2967 uint8_t round, shift = b & 0x1f; 2968 int32_t res; 2969 2970 round = get_round(vxrm, a, shift); 2971 res = (a >> shift) + round; 2972 if (res > INT16_MAX) { 2973 env->vxsat = 0x1; 2974 return INT16_MAX; 2975 } else if (res < INT16_MIN) { 2976 env->vxsat = 0x1; 2977 return INT16_MIN; 2978 } else { 2979 return res; 2980 } 2981 } 2982 2983 static inline int32_t 2984 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2985 { 2986 uint8_t round, shift = b & 0x3f; 2987 int64_t res; 2988 2989 round = get_round(vxrm, a, shift); 2990 res = (a >> shift) + round; 2991 if (res > INT32_MAX) { 2992 env->vxsat = 0x1; 2993 return INT32_MAX; 2994 } else if (res < INT32_MIN) { 2995 env->vxsat = 0x1; 2996 return INT32_MIN; 2997 } else { 2998 return res; 2999 } 3000 } 3001 3002 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 3003 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 3004 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 3005 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 3006 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 3007 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 3008 3009 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 3010 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 3011 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 3012 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 3013 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 3014 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 3015 3016 static inline uint8_t 3017 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 3018 { 3019 uint8_t round, shift = b & 0xf; 3020 uint16_t res; 3021 3022 round = get_round(vxrm, a, shift); 3023 res = (a >> shift) + round; 3024 if (res > UINT8_MAX) { 3025 env->vxsat = 0x1; 3026 return UINT8_MAX; 3027 } else { 3028 return res; 3029 } 3030 } 3031 3032 static inline uint16_t 3033 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3034 { 3035 uint8_t round, shift = b & 0x1f; 3036 uint32_t res; 3037 3038 round = get_round(vxrm, a, shift); 3039 res = (a >> shift) + round; 3040 if (res > UINT16_MAX) { 3041 env->vxsat = 0x1; 3042 return UINT16_MAX; 3043 } else { 3044 return res; 3045 } 3046 } 3047 3048 static inline uint32_t 3049 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3050 { 3051 uint8_t round, shift = b & 0x3f; 3052 uint64_t res; 3053 3054 round = get_round(vxrm, a, shift); 3055 res = (a >> shift) + round; 3056 if (res > UINT32_MAX) { 3057 env->vxsat = 0x1; 3058 return UINT32_MAX; 3059 } else { 3060 return res; 3061 } 3062 } 3063 3064 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3065 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3066 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3067 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3068 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3069 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3070 3071 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3072 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3073 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3074 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3075 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3076 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3077 3078 /* 3079 * Vector Float Point Arithmetic Instructions 3080 */ 3081 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3082 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3083 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3084 CPURISCVState *env) \ 3085 { \ 3086 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3087 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3088 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3089 } 3090 3091 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3092 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3093 void *vs2, CPURISCVState *env, \ 3094 uint32_t desc) \ 3095 { \ 3096 uint32_t vm = vext_vm(desc); \ 3097 uint32_t vl = env->vl; \ 3098 uint32_t total_elems = \ 3099 vext_get_total_elems(env, desc, ESZ); \ 3100 uint32_t vta = vext_vta(desc); \ 3101 uint32_t vma = vext_vma(desc); \ 3102 uint32_t i; \ 3103 \ 3104 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3105 \ 3106 for (i = env->vstart; i < vl; i++) { \ 3107 if (!vm && !vext_elem_mask(v0, i)) { \ 3108 /* set masked-off elements to 1s */ \ 3109 vext_set_elems_1s(vd, vma, i * ESZ, \ 3110 (i + 1) * ESZ); \ 3111 continue; \ 3112 } \ 3113 do_##NAME(vd, vs1, vs2, i, env); \ 3114 } \ 3115 env->vstart = 0; \ 3116 /* set tail elements to 1s */ \ 3117 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3118 total_elems * ESZ); \ 3119 } 3120 3121 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3122 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3123 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3124 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3125 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3126 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3127 3128 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3129 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3130 CPURISCVState *env) \ 3131 { \ 3132 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3133 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3134 } 3135 3136 #define GEN_VEXT_VF(NAME, ESZ) \ 3137 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3138 void *vs2, CPURISCVState *env, \ 3139 uint32_t desc) \ 3140 { \ 3141 uint32_t vm = vext_vm(desc); \ 3142 uint32_t vl = env->vl; \ 3143 uint32_t total_elems = \ 3144 vext_get_total_elems(env, desc, ESZ); \ 3145 uint32_t vta = vext_vta(desc); \ 3146 uint32_t vma = vext_vma(desc); \ 3147 uint32_t i; \ 3148 \ 3149 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3150 \ 3151 for (i = env->vstart; i < vl; i++) { \ 3152 if (!vm && !vext_elem_mask(v0, i)) { \ 3153 /* set masked-off elements to 1s */ \ 3154 vext_set_elems_1s(vd, vma, i * ESZ, \ 3155 (i + 1) * ESZ); \ 3156 continue; \ 3157 } \ 3158 do_##NAME(vd, s1, vs2, i, env); \ 3159 } \ 3160 env->vstart = 0; \ 3161 /* set tail elements to 1s */ \ 3162 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3163 total_elems * ESZ); \ 3164 } 3165 3166 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3167 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3168 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3169 GEN_VEXT_VF(vfadd_vf_h, 2) 3170 GEN_VEXT_VF(vfadd_vf_w, 4) 3171 GEN_VEXT_VF(vfadd_vf_d, 8) 3172 3173 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3174 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3175 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3176 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3177 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3178 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3179 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3180 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3181 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3182 GEN_VEXT_VF(vfsub_vf_h, 2) 3183 GEN_VEXT_VF(vfsub_vf_w, 4) 3184 GEN_VEXT_VF(vfsub_vf_d, 8) 3185 3186 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3187 { 3188 return float16_sub(b, a, s); 3189 } 3190 3191 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3192 { 3193 return float32_sub(b, a, s); 3194 } 3195 3196 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3197 { 3198 return float64_sub(b, a, s); 3199 } 3200 3201 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3202 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3203 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3204 GEN_VEXT_VF(vfrsub_vf_h, 2) 3205 GEN_VEXT_VF(vfrsub_vf_w, 4) 3206 GEN_VEXT_VF(vfrsub_vf_d, 8) 3207 3208 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3209 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3210 { 3211 return float32_add(float16_to_float32(a, true, s), 3212 float16_to_float32(b, true, s), s); 3213 } 3214 3215 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3216 { 3217 return float64_add(float32_to_float64(a, s), 3218 float32_to_float64(b, s), s); 3219 3220 } 3221 3222 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3223 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3224 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3225 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3226 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3227 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3228 GEN_VEXT_VF(vfwadd_vf_h, 4) 3229 GEN_VEXT_VF(vfwadd_vf_w, 8) 3230 3231 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3232 { 3233 return float32_sub(float16_to_float32(a, true, s), 3234 float16_to_float32(b, true, s), s); 3235 } 3236 3237 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3238 { 3239 return float64_sub(float32_to_float64(a, s), 3240 float32_to_float64(b, s), s); 3241 3242 } 3243 3244 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3245 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3246 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3247 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3248 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3249 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3250 GEN_VEXT_VF(vfwsub_vf_h, 4) 3251 GEN_VEXT_VF(vfwsub_vf_w, 8) 3252 3253 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3254 { 3255 return float32_add(a, float16_to_float32(b, true, s), s); 3256 } 3257 3258 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3259 { 3260 return float64_add(a, float32_to_float64(b, s), s); 3261 } 3262 3263 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3264 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3265 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3266 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3267 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3268 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3269 GEN_VEXT_VF(vfwadd_wf_h, 4) 3270 GEN_VEXT_VF(vfwadd_wf_w, 8) 3271 3272 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3273 { 3274 return float32_sub(a, float16_to_float32(b, true, s), s); 3275 } 3276 3277 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3278 { 3279 return float64_sub(a, float32_to_float64(b, s), s); 3280 } 3281 3282 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3283 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3284 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3285 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3286 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3287 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3288 GEN_VEXT_VF(vfwsub_wf_h, 4) 3289 GEN_VEXT_VF(vfwsub_wf_w, 8) 3290 3291 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3292 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3293 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3294 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3295 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3296 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3297 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3298 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3299 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3300 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3301 GEN_VEXT_VF(vfmul_vf_h, 2) 3302 GEN_VEXT_VF(vfmul_vf_w, 4) 3303 GEN_VEXT_VF(vfmul_vf_d, 8) 3304 3305 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3306 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3307 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3308 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3309 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3310 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3311 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3312 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3313 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3314 GEN_VEXT_VF(vfdiv_vf_h, 2) 3315 GEN_VEXT_VF(vfdiv_vf_w, 4) 3316 GEN_VEXT_VF(vfdiv_vf_d, 8) 3317 3318 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3319 { 3320 return float16_div(b, a, s); 3321 } 3322 3323 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3324 { 3325 return float32_div(b, a, s); 3326 } 3327 3328 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3329 { 3330 return float64_div(b, a, s); 3331 } 3332 3333 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3334 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3335 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3336 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3337 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3338 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3339 3340 /* Vector Widening Floating-Point Multiply */ 3341 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3342 { 3343 return float32_mul(float16_to_float32(a, true, s), 3344 float16_to_float32(b, true, s), s); 3345 } 3346 3347 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3348 { 3349 return float64_mul(float32_to_float64(a, s), 3350 float32_to_float64(b, s), s); 3351 3352 } 3353 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3354 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3355 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3356 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3357 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3358 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3359 GEN_VEXT_VF(vfwmul_vf_h, 4) 3360 GEN_VEXT_VF(vfwmul_vf_w, 8) 3361 3362 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3363 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3364 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3365 CPURISCVState *env) \ 3366 { \ 3367 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3368 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3369 TD d = *((TD *)vd + HD(i)); \ 3370 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3371 } 3372 3373 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3374 { 3375 return float16_muladd(a, b, d, 0, s); 3376 } 3377 3378 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3379 { 3380 return float32_muladd(a, b, d, 0, s); 3381 } 3382 3383 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3384 { 3385 return float64_muladd(a, b, d, 0, s); 3386 } 3387 3388 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3389 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3390 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3391 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3392 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3393 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3394 3395 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3396 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3397 CPURISCVState *env) \ 3398 { \ 3399 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3400 TD d = *((TD *)vd + HD(i)); \ 3401 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3402 } 3403 3404 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3405 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3406 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3407 GEN_VEXT_VF(vfmacc_vf_h, 2) 3408 GEN_VEXT_VF(vfmacc_vf_w, 4) 3409 GEN_VEXT_VF(vfmacc_vf_d, 8) 3410 3411 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3412 { 3413 return float16_muladd(a, b, d, float_muladd_negate_c | 3414 float_muladd_negate_product, s); 3415 } 3416 3417 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3418 { 3419 return float32_muladd(a, b, d, float_muladd_negate_c | 3420 float_muladd_negate_product, s); 3421 } 3422 3423 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3424 { 3425 return float64_muladd(a, b, d, float_muladd_negate_c | 3426 float_muladd_negate_product, s); 3427 } 3428 3429 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3430 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3431 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3432 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3433 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3434 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3435 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3436 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3437 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3438 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3439 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3440 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3441 3442 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3443 { 3444 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3445 } 3446 3447 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3448 { 3449 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3450 } 3451 3452 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3453 { 3454 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3455 } 3456 3457 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3458 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3459 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3460 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3461 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3462 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3463 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3464 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3465 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3466 GEN_VEXT_VF(vfmsac_vf_h, 2) 3467 GEN_VEXT_VF(vfmsac_vf_w, 4) 3468 GEN_VEXT_VF(vfmsac_vf_d, 8) 3469 3470 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3471 { 3472 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3473 } 3474 3475 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3476 { 3477 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3478 } 3479 3480 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3481 { 3482 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3483 } 3484 3485 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3486 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3487 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3488 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3489 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3490 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3491 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3492 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3493 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3494 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3495 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3496 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3497 3498 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3499 { 3500 return float16_muladd(d, b, a, 0, s); 3501 } 3502 3503 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3504 { 3505 return float32_muladd(d, b, a, 0, s); 3506 } 3507 3508 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3509 { 3510 return float64_muladd(d, b, a, 0, s); 3511 } 3512 3513 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3514 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3515 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3516 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3517 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3518 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3519 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3520 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3521 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3522 GEN_VEXT_VF(vfmadd_vf_h, 2) 3523 GEN_VEXT_VF(vfmadd_vf_w, 4) 3524 GEN_VEXT_VF(vfmadd_vf_d, 8) 3525 3526 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3527 { 3528 return float16_muladd(d, b, a, float_muladd_negate_c | 3529 float_muladd_negate_product, s); 3530 } 3531 3532 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3533 { 3534 return float32_muladd(d, b, a, float_muladd_negate_c | 3535 float_muladd_negate_product, s); 3536 } 3537 3538 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3539 { 3540 return float64_muladd(d, b, a, float_muladd_negate_c | 3541 float_muladd_negate_product, s); 3542 } 3543 3544 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3545 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3546 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3547 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3548 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3549 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3550 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3551 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3552 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3553 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3554 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3555 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3556 3557 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3558 { 3559 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3560 } 3561 3562 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3563 { 3564 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3565 } 3566 3567 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3568 { 3569 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3570 } 3571 3572 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3573 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3574 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3575 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3576 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3577 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3578 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3579 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3580 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3581 GEN_VEXT_VF(vfmsub_vf_h, 2) 3582 GEN_VEXT_VF(vfmsub_vf_w, 4) 3583 GEN_VEXT_VF(vfmsub_vf_d, 8) 3584 3585 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3586 { 3587 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3588 } 3589 3590 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3591 { 3592 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3593 } 3594 3595 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3596 { 3597 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3598 } 3599 3600 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3601 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3602 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3603 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3604 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3605 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3606 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3607 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3608 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3609 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3610 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3611 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3612 3613 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3614 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3615 { 3616 return float32_muladd(float16_to_float32(a, true, s), 3617 float16_to_float32(b, true, s), d, 0, s); 3618 } 3619 3620 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3621 { 3622 return float64_muladd(float32_to_float64(a, s), 3623 float32_to_float64(b, s), d, 0, s); 3624 } 3625 3626 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3627 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3628 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3629 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3630 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3631 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3632 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3633 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3634 3635 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3636 { 3637 return float32_muladd(bfloat16_to_float32(a, s), 3638 bfloat16_to_float32(b, s), d, 0, s); 3639 } 3640 3641 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3642 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3643 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3644 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3645 3646 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3647 { 3648 return float32_muladd(float16_to_float32(a, true, s), 3649 float16_to_float32(b, true, s), d, 3650 float_muladd_negate_c | float_muladd_negate_product, 3651 s); 3652 } 3653 3654 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3655 { 3656 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3657 d, float_muladd_negate_c | 3658 float_muladd_negate_product, s); 3659 } 3660 3661 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3662 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3663 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3664 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3665 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3666 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3667 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3668 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3669 3670 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3671 { 3672 return float32_muladd(float16_to_float32(a, true, s), 3673 float16_to_float32(b, true, s), d, 3674 float_muladd_negate_c, s); 3675 } 3676 3677 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3678 { 3679 return float64_muladd(float32_to_float64(a, s), 3680 float32_to_float64(b, s), d, 3681 float_muladd_negate_c, s); 3682 } 3683 3684 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3685 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3686 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3687 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3688 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3689 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3690 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3691 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3692 3693 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3694 { 3695 return float32_muladd(float16_to_float32(a, true, s), 3696 float16_to_float32(b, true, s), d, 3697 float_muladd_negate_product, s); 3698 } 3699 3700 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3701 { 3702 return float64_muladd(float32_to_float64(a, s), 3703 float32_to_float64(b, s), d, 3704 float_muladd_negate_product, s); 3705 } 3706 3707 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3708 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3709 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3710 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3711 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3712 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3713 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3714 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3715 3716 /* Vector Floating-Point Square-Root Instruction */ 3717 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3718 static void do_##NAME(void *vd, void *vs2, int i, \ 3719 CPURISCVState *env) \ 3720 { \ 3721 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3722 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3723 } 3724 3725 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3726 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3727 CPURISCVState *env, uint32_t desc) \ 3728 { \ 3729 uint32_t vm = vext_vm(desc); \ 3730 uint32_t vl = env->vl; \ 3731 uint32_t total_elems = \ 3732 vext_get_total_elems(env, desc, ESZ); \ 3733 uint32_t vta = vext_vta(desc); \ 3734 uint32_t vma = vext_vma(desc); \ 3735 uint32_t i; \ 3736 \ 3737 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3738 \ 3739 if (vl == 0) { \ 3740 return; \ 3741 } \ 3742 for (i = env->vstart; i < vl; i++) { \ 3743 if (!vm && !vext_elem_mask(v0, i)) { \ 3744 /* set masked-off elements to 1s */ \ 3745 vext_set_elems_1s(vd, vma, i * ESZ, \ 3746 (i + 1) * ESZ); \ 3747 continue; \ 3748 } \ 3749 do_##NAME(vd, vs2, i, env); \ 3750 } \ 3751 env->vstart = 0; \ 3752 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3753 total_elems * ESZ); \ 3754 } 3755 3756 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3757 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3758 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3759 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3760 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3761 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3762 3763 /* 3764 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3765 * 3766 * Adapted from riscv-v-spec recip.c: 3767 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3768 */ 3769 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3770 { 3771 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3772 uint64_t exp = extract64(f, frac_size, exp_size); 3773 uint64_t frac = extract64(f, 0, frac_size); 3774 3775 const uint8_t lookup_table[] = { 3776 52, 51, 50, 48, 47, 46, 44, 43, 3777 42, 41, 40, 39, 38, 36, 35, 34, 3778 33, 32, 31, 30, 30, 29, 28, 27, 3779 26, 25, 24, 23, 23, 22, 21, 20, 3780 19, 19, 18, 17, 16, 16, 15, 14, 3781 14, 13, 12, 12, 11, 10, 10, 9, 3782 9, 8, 7, 7, 6, 6, 5, 4, 3783 4, 3, 3, 2, 2, 1, 1, 0, 3784 127, 125, 123, 121, 119, 118, 116, 114, 3785 113, 111, 109, 108, 106, 105, 103, 102, 3786 100, 99, 97, 96, 95, 93, 92, 91, 3787 90, 88, 87, 86, 85, 84, 83, 82, 3788 80, 79, 78, 77, 76, 75, 74, 73, 3789 72, 71, 70, 70, 69, 68, 67, 66, 3790 65, 64, 63, 63, 62, 61, 60, 59, 3791 59, 58, 57, 56, 56, 55, 54, 53 3792 }; 3793 const int precision = 7; 3794 3795 if (exp == 0 && frac != 0) { /* subnormal */ 3796 /* Normalize the subnormal. */ 3797 while (extract64(frac, frac_size - 1, 1) == 0) { 3798 exp--; 3799 frac <<= 1; 3800 } 3801 3802 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3803 } 3804 3805 int idx = ((exp & 1) << (precision - 1)) | 3806 (frac >> (frac_size - precision + 1)); 3807 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3808 (frac_size - precision); 3809 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3810 3811 uint64_t val = 0; 3812 val = deposit64(val, 0, frac_size, out_frac); 3813 val = deposit64(val, frac_size, exp_size, out_exp); 3814 val = deposit64(val, frac_size + exp_size, 1, sign); 3815 return val; 3816 } 3817 3818 static float16 frsqrt7_h(float16 f, float_status *s) 3819 { 3820 int exp_size = 5, frac_size = 10; 3821 bool sign = float16_is_neg(f); 3822 3823 /* 3824 * frsqrt7(sNaN) = canonical NaN 3825 * frsqrt7(-inf) = canonical NaN 3826 * frsqrt7(-normal) = canonical NaN 3827 * frsqrt7(-subnormal) = canonical NaN 3828 */ 3829 if (float16_is_signaling_nan(f, s) || 3830 (float16_is_infinity(f) && sign) || 3831 (float16_is_normal(f) && sign) || 3832 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3833 s->float_exception_flags |= float_flag_invalid; 3834 return float16_default_nan(s); 3835 } 3836 3837 /* frsqrt7(qNaN) = canonical NaN */ 3838 if (float16_is_quiet_nan(f, s)) { 3839 return float16_default_nan(s); 3840 } 3841 3842 /* frsqrt7(+-0) = +-inf */ 3843 if (float16_is_zero(f)) { 3844 s->float_exception_flags |= float_flag_divbyzero; 3845 return float16_set_sign(float16_infinity, sign); 3846 } 3847 3848 /* frsqrt7(+inf) = +0 */ 3849 if (float16_is_infinity(f) && !sign) { 3850 return float16_set_sign(float16_zero, sign); 3851 } 3852 3853 /* +normal, +subnormal */ 3854 uint64_t val = frsqrt7(f, exp_size, frac_size); 3855 return make_float16(val); 3856 } 3857 3858 static float32 frsqrt7_s(float32 f, float_status *s) 3859 { 3860 int exp_size = 8, frac_size = 23; 3861 bool sign = float32_is_neg(f); 3862 3863 /* 3864 * frsqrt7(sNaN) = canonical NaN 3865 * frsqrt7(-inf) = canonical NaN 3866 * frsqrt7(-normal) = canonical NaN 3867 * frsqrt7(-subnormal) = canonical NaN 3868 */ 3869 if (float32_is_signaling_nan(f, s) || 3870 (float32_is_infinity(f) && sign) || 3871 (float32_is_normal(f) && sign) || 3872 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3873 s->float_exception_flags |= float_flag_invalid; 3874 return float32_default_nan(s); 3875 } 3876 3877 /* frsqrt7(qNaN) = canonical NaN */ 3878 if (float32_is_quiet_nan(f, s)) { 3879 return float32_default_nan(s); 3880 } 3881 3882 /* frsqrt7(+-0) = +-inf */ 3883 if (float32_is_zero(f)) { 3884 s->float_exception_flags |= float_flag_divbyzero; 3885 return float32_set_sign(float32_infinity, sign); 3886 } 3887 3888 /* frsqrt7(+inf) = +0 */ 3889 if (float32_is_infinity(f) && !sign) { 3890 return float32_set_sign(float32_zero, sign); 3891 } 3892 3893 /* +normal, +subnormal */ 3894 uint64_t val = frsqrt7(f, exp_size, frac_size); 3895 return make_float32(val); 3896 } 3897 3898 static float64 frsqrt7_d(float64 f, float_status *s) 3899 { 3900 int exp_size = 11, frac_size = 52; 3901 bool sign = float64_is_neg(f); 3902 3903 /* 3904 * frsqrt7(sNaN) = canonical NaN 3905 * frsqrt7(-inf) = canonical NaN 3906 * frsqrt7(-normal) = canonical NaN 3907 * frsqrt7(-subnormal) = canonical NaN 3908 */ 3909 if (float64_is_signaling_nan(f, s) || 3910 (float64_is_infinity(f) && sign) || 3911 (float64_is_normal(f) && sign) || 3912 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3913 s->float_exception_flags |= float_flag_invalid; 3914 return float64_default_nan(s); 3915 } 3916 3917 /* frsqrt7(qNaN) = canonical NaN */ 3918 if (float64_is_quiet_nan(f, s)) { 3919 return float64_default_nan(s); 3920 } 3921 3922 /* frsqrt7(+-0) = +-inf */ 3923 if (float64_is_zero(f)) { 3924 s->float_exception_flags |= float_flag_divbyzero; 3925 return float64_set_sign(float64_infinity, sign); 3926 } 3927 3928 /* frsqrt7(+inf) = +0 */ 3929 if (float64_is_infinity(f) && !sign) { 3930 return float64_set_sign(float64_zero, sign); 3931 } 3932 3933 /* +normal, +subnormal */ 3934 uint64_t val = frsqrt7(f, exp_size, frac_size); 3935 return make_float64(val); 3936 } 3937 3938 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3939 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3940 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3941 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3942 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3943 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3944 3945 /* 3946 * Vector Floating-Point Reciprocal Estimate Instruction 3947 * 3948 * Adapted from riscv-v-spec recip.c: 3949 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3950 */ 3951 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3952 float_status *s) 3953 { 3954 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3955 uint64_t exp = extract64(f, frac_size, exp_size); 3956 uint64_t frac = extract64(f, 0, frac_size); 3957 3958 const uint8_t lookup_table[] = { 3959 127, 125, 123, 121, 119, 117, 116, 114, 3960 112, 110, 109, 107, 105, 104, 102, 100, 3961 99, 97, 96, 94, 93, 91, 90, 88, 3962 87, 85, 84, 83, 81, 80, 79, 77, 3963 76, 75, 74, 72, 71, 70, 69, 68, 3964 66, 65, 64, 63, 62, 61, 60, 59, 3965 58, 57, 56, 55, 54, 53, 52, 51, 3966 50, 49, 48, 47, 46, 45, 44, 43, 3967 42, 41, 40, 40, 39, 38, 37, 36, 3968 35, 35, 34, 33, 32, 31, 31, 30, 3969 29, 28, 28, 27, 26, 25, 25, 24, 3970 23, 23, 22, 21, 21, 20, 19, 19, 3971 18, 17, 17, 16, 15, 15, 14, 14, 3972 13, 12, 12, 11, 11, 10, 9, 9, 3973 8, 8, 7, 7, 6, 5, 5, 4, 3974 4, 3, 3, 2, 2, 1, 1, 0 3975 }; 3976 const int precision = 7; 3977 3978 if (exp == 0 && frac != 0) { /* subnormal */ 3979 /* Normalize the subnormal. */ 3980 while (extract64(frac, frac_size - 1, 1) == 0) { 3981 exp--; 3982 frac <<= 1; 3983 } 3984 3985 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3986 3987 if (exp != 0 && exp != UINT64_MAX) { 3988 /* 3989 * Overflow to inf or max value of same sign, 3990 * depending on sign and rounding mode. 3991 */ 3992 s->float_exception_flags |= (float_flag_inexact | 3993 float_flag_overflow); 3994 3995 if ((s->float_rounding_mode == float_round_to_zero) || 3996 ((s->float_rounding_mode == float_round_down) && !sign) || 3997 ((s->float_rounding_mode == float_round_up) && sign)) { 3998 /* Return greatest/negative finite value. */ 3999 return (sign << (exp_size + frac_size)) | 4000 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 4001 } else { 4002 /* Return +-inf. */ 4003 return (sign << (exp_size + frac_size)) | 4004 MAKE_64BIT_MASK(frac_size, exp_size); 4005 } 4006 } 4007 } 4008 4009 int idx = frac >> (frac_size - precision); 4010 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 4011 (frac_size - precision); 4012 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 4013 4014 if (out_exp == 0 || out_exp == UINT64_MAX) { 4015 /* 4016 * The result is subnormal, but don't raise the underflow exception, 4017 * because there's no additional loss of precision. 4018 */ 4019 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 4020 if (out_exp == UINT64_MAX) { 4021 out_frac >>= 1; 4022 out_exp = 0; 4023 } 4024 } 4025 4026 uint64_t val = 0; 4027 val = deposit64(val, 0, frac_size, out_frac); 4028 val = deposit64(val, frac_size, exp_size, out_exp); 4029 val = deposit64(val, frac_size + exp_size, 1, sign); 4030 return val; 4031 } 4032 4033 static float16 frec7_h(float16 f, float_status *s) 4034 { 4035 int exp_size = 5, frac_size = 10; 4036 bool sign = float16_is_neg(f); 4037 4038 /* frec7(+-inf) = +-0 */ 4039 if (float16_is_infinity(f)) { 4040 return float16_set_sign(float16_zero, sign); 4041 } 4042 4043 /* frec7(+-0) = +-inf */ 4044 if (float16_is_zero(f)) { 4045 s->float_exception_flags |= float_flag_divbyzero; 4046 return float16_set_sign(float16_infinity, sign); 4047 } 4048 4049 /* frec7(sNaN) = canonical NaN */ 4050 if (float16_is_signaling_nan(f, s)) { 4051 s->float_exception_flags |= float_flag_invalid; 4052 return float16_default_nan(s); 4053 } 4054 4055 /* frec7(qNaN) = canonical NaN */ 4056 if (float16_is_quiet_nan(f, s)) { 4057 return float16_default_nan(s); 4058 } 4059 4060 /* +-normal, +-subnormal */ 4061 uint64_t val = frec7(f, exp_size, frac_size, s); 4062 return make_float16(val); 4063 } 4064 4065 static float32 frec7_s(float32 f, float_status *s) 4066 { 4067 int exp_size = 8, frac_size = 23; 4068 bool sign = float32_is_neg(f); 4069 4070 /* frec7(+-inf) = +-0 */ 4071 if (float32_is_infinity(f)) { 4072 return float32_set_sign(float32_zero, sign); 4073 } 4074 4075 /* frec7(+-0) = +-inf */ 4076 if (float32_is_zero(f)) { 4077 s->float_exception_flags |= float_flag_divbyzero; 4078 return float32_set_sign(float32_infinity, sign); 4079 } 4080 4081 /* frec7(sNaN) = canonical NaN */ 4082 if (float32_is_signaling_nan(f, s)) { 4083 s->float_exception_flags |= float_flag_invalid; 4084 return float32_default_nan(s); 4085 } 4086 4087 /* frec7(qNaN) = canonical NaN */ 4088 if (float32_is_quiet_nan(f, s)) { 4089 return float32_default_nan(s); 4090 } 4091 4092 /* +-normal, +-subnormal */ 4093 uint64_t val = frec7(f, exp_size, frac_size, s); 4094 return make_float32(val); 4095 } 4096 4097 static float64 frec7_d(float64 f, float_status *s) 4098 { 4099 int exp_size = 11, frac_size = 52; 4100 bool sign = float64_is_neg(f); 4101 4102 /* frec7(+-inf) = +-0 */ 4103 if (float64_is_infinity(f)) { 4104 return float64_set_sign(float64_zero, sign); 4105 } 4106 4107 /* frec7(+-0) = +-inf */ 4108 if (float64_is_zero(f)) { 4109 s->float_exception_flags |= float_flag_divbyzero; 4110 return float64_set_sign(float64_infinity, sign); 4111 } 4112 4113 /* frec7(sNaN) = canonical NaN */ 4114 if (float64_is_signaling_nan(f, s)) { 4115 s->float_exception_flags |= float_flag_invalid; 4116 return float64_default_nan(s); 4117 } 4118 4119 /* frec7(qNaN) = canonical NaN */ 4120 if (float64_is_quiet_nan(f, s)) { 4121 return float64_default_nan(s); 4122 } 4123 4124 /* +-normal, +-subnormal */ 4125 uint64_t val = frec7(f, exp_size, frac_size, s); 4126 return make_float64(val); 4127 } 4128 4129 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4130 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4131 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4132 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4133 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4134 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4135 4136 /* Vector Floating-Point MIN/MAX Instructions */ 4137 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4138 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4139 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4140 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4141 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4142 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4143 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4144 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4145 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4146 GEN_VEXT_VF(vfmin_vf_h, 2) 4147 GEN_VEXT_VF(vfmin_vf_w, 4) 4148 GEN_VEXT_VF(vfmin_vf_d, 8) 4149 4150 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4151 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4152 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4153 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4154 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4155 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4156 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4157 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4158 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4159 GEN_VEXT_VF(vfmax_vf_h, 2) 4160 GEN_VEXT_VF(vfmax_vf_w, 4) 4161 GEN_VEXT_VF(vfmax_vf_d, 8) 4162 4163 /* Vector Floating-Point Sign-Injection Instructions */ 4164 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4165 { 4166 return deposit64(b, 0, 15, a); 4167 } 4168 4169 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4170 { 4171 return deposit64(b, 0, 31, a); 4172 } 4173 4174 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4175 { 4176 return deposit64(b, 0, 63, a); 4177 } 4178 4179 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4180 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4181 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4182 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4183 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4184 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4185 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4186 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4187 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4188 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4189 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4190 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4191 4192 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4193 { 4194 return deposit64(~b, 0, 15, a); 4195 } 4196 4197 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4198 { 4199 return deposit64(~b, 0, 31, a); 4200 } 4201 4202 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4203 { 4204 return deposit64(~b, 0, 63, a); 4205 } 4206 4207 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4208 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4209 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4210 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4211 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4212 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4213 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4214 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4215 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4216 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4217 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4218 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4219 4220 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4221 { 4222 return deposit64(b ^ a, 0, 15, a); 4223 } 4224 4225 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4226 { 4227 return deposit64(b ^ a, 0, 31, a); 4228 } 4229 4230 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4231 { 4232 return deposit64(b ^ a, 0, 63, a); 4233 } 4234 4235 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4236 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4237 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4238 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4239 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4240 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4241 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4242 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4243 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4244 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4245 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4246 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4247 4248 /* Vector Floating-Point Compare Instructions */ 4249 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4250 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4251 CPURISCVState *env, uint32_t desc) \ 4252 { \ 4253 uint32_t vm = vext_vm(desc); \ 4254 uint32_t vl = env->vl; \ 4255 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4256 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4257 uint32_t vma = vext_vma(desc); \ 4258 uint32_t i; \ 4259 \ 4260 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4261 \ 4262 for (i = env->vstart; i < vl; i++) { \ 4263 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4264 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4265 if (!vm && !vext_elem_mask(v0, i)) { \ 4266 /* set masked-off elements to 1s */ \ 4267 if (vma) { \ 4268 vext_set_elem_mask(vd, i, 1); \ 4269 } \ 4270 continue; \ 4271 } \ 4272 vext_set_elem_mask(vd, i, \ 4273 DO_OP(s2, s1, &env->fp_status)); \ 4274 } \ 4275 env->vstart = 0; \ 4276 /* 4277 * mask destination register are always tail-agnostic 4278 * set tail elements to 1s 4279 */ \ 4280 if (vta_all_1s) { \ 4281 for (; i < total_elems; i++) { \ 4282 vext_set_elem_mask(vd, i, 1); \ 4283 } \ 4284 } \ 4285 } 4286 4287 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4288 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4289 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4290 4291 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4292 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4293 CPURISCVState *env, uint32_t desc) \ 4294 { \ 4295 uint32_t vm = vext_vm(desc); \ 4296 uint32_t vl = env->vl; \ 4297 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4298 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4299 uint32_t vma = vext_vma(desc); \ 4300 uint32_t i; \ 4301 \ 4302 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4303 \ 4304 for (i = env->vstart; i < vl; i++) { \ 4305 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4306 if (!vm && !vext_elem_mask(v0, i)) { \ 4307 /* set masked-off elements to 1s */ \ 4308 if (vma) { \ 4309 vext_set_elem_mask(vd, i, 1); \ 4310 } \ 4311 continue; \ 4312 } \ 4313 vext_set_elem_mask(vd, i, \ 4314 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4315 } \ 4316 env->vstart = 0; \ 4317 /* 4318 * mask destination register are always tail-agnostic 4319 * set tail elements to 1s 4320 */ \ 4321 if (vta_all_1s) { \ 4322 for (; i < total_elems; i++) { \ 4323 vext_set_elem_mask(vd, i, 1); \ 4324 } \ 4325 } \ 4326 } 4327 4328 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4329 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4330 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4331 4332 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4333 { 4334 FloatRelation compare = float16_compare_quiet(a, b, s); 4335 return compare != float_relation_equal; 4336 } 4337 4338 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4339 { 4340 FloatRelation compare = float32_compare_quiet(a, b, s); 4341 return compare != float_relation_equal; 4342 } 4343 4344 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4345 { 4346 FloatRelation compare = float64_compare_quiet(a, b, s); 4347 return compare != float_relation_equal; 4348 } 4349 4350 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4351 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4352 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4353 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4354 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4355 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4356 4357 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4358 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4359 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4360 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4361 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4362 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4363 4364 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4365 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4366 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4367 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4368 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4369 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4370 4371 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4372 { 4373 FloatRelation compare = float16_compare(a, b, s); 4374 return compare == float_relation_greater; 4375 } 4376 4377 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4378 { 4379 FloatRelation compare = float32_compare(a, b, s); 4380 return compare == float_relation_greater; 4381 } 4382 4383 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4384 { 4385 FloatRelation compare = float64_compare(a, b, s); 4386 return compare == float_relation_greater; 4387 } 4388 4389 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4390 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4391 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4392 4393 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4394 { 4395 FloatRelation compare = float16_compare(a, b, s); 4396 return compare == float_relation_greater || 4397 compare == float_relation_equal; 4398 } 4399 4400 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4401 { 4402 FloatRelation compare = float32_compare(a, b, s); 4403 return compare == float_relation_greater || 4404 compare == float_relation_equal; 4405 } 4406 4407 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4408 { 4409 FloatRelation compare = float64_compare(a, b, s); 4410 return compare == float_relation_greater || 4411 compare == float_relation_equal; 4412 } 4413 4414 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4415 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4416 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4417 4418 /* Vector Floating-Point Classify Instruction */ 4419 target_ulong fclass_h(uint64_t frs1) 4420 { 4421 float16 f = frs1; 4422 bool sign = float16_is_neg(f); 4423 4424 if (float16_is_infinity(f)) { 4425 return sign ? 1 << 0 : 1 << 7; 4426 } else if (float16_is_zero(f)) { 4427 return sign ? 1 << 3 : 1 << 4; 4428 } else if (float16_is_zero_or_denormal(f)) { 4429 return sign ? 1 << 2 : 1 << 5; 4430 } else if (float16_is_any_nan(f)) { 4431 float_status s = { }; /* for snan_bit_is_one */ 4432 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4433 } else { 4434 return sign ? 1 << 1 : 1 << 6; 4435 } 4436 } 4437 4438 target_ulong fclass_s(uint64_t frs1) 4439 { 4440 float32 f = frs1; 4441 bool sign = float32_is_neg(f); 4442 4443 if (float32_is_infinity(f)) { 4444 return sign ? 1 << 0 : 1 << 7; 4445 } else if (float32_is_zero(f)) { 4446 return sign ? 1 << 3 : 1 << 4; 4447 } else if (float32_is_zero_or_denormal(f)) { 4448 return sign ? 1 << 2 : 1 << 5; 4449 } else if (float32_is_any_nan(f)) { 4450 float_status s = { }; /* for snan_bit_is_one */ 4451 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4452 } else { 4453 return sign ? 1 << 1 : 1 << 6; 4454 } 4455 } 4456 4457 target_ulong fclass_d(uint64_t frs1) 4458 { 4459 float64 f = frs1; 4460 bool sign = float64_is_neg(f); 4461 4462 if (float64_is_infinity(f)) { 4463 return sign ? 1 << 0 : 1 << 7; 4464 } else if (float64_is_zero(f)) { 4465 return sign ? 1 << 3 : 1 << 4; 4466 } else if (float64_is_zero_or_denormal(f)) { 4467 return sign ? 1 << 2 : 1 << 5; 4468 } else if (float64_is_any_nan(f)) { 4469 float_status s = { }; /* for snan_bit_is_one */ 4470 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4471 } else { 4472 return sign ? 1 << 1 : 1 << 6; 4473 } 4474 } 4475 4476 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4477 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4478 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4479 GEN_VEXT_V(vfclass_v_h, 2) 4480 GEN_VEXT_V(vfclass_v_w, 4) 4481 GEN_VEXT_V(vfclass_v_d, 8) 4482 4483 /* Vector Floating-Point Merge Instruction */ 4484 4485 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4486 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4487 CPURISCVState *env, uint32_t desc) \ 4488 { \ 4489 uint32_t vm = vext_vm(desc); \ 4490 uint32_t vl = env->vl; \ 4491 uint32_t esz = sizeof(ETYPE); \ 4492 uint32_t total_elems = \ 4493 vext_get_total_elems(env, desc, esz); \ 4494 uint32_t vta = vext_vta(desc); \ 4495 uint32_t i; \ 4496 \ 4497 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4498 \ 4499 for (i = env->vstart; i < vl; i++) { \ 4500 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4501 *((ETYPE *)vd + H(i)) = \ 4502 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4503 } \ 4504 env->vstart = 0; \ 4505 /* set tail elements to 1s */ \ 4506 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4507 } 4508 4509 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4510 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4511 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4512 4513 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4514 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4515 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4516 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4517 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4518 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4519 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4520 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4521 4522 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4523 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4524 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4525 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4526 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4527 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4528 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4529 4530 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4531 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4532 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4533 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4534 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4535 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4536 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4537 4538 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4539 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4540 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4541 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4542 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4543 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4544 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4545 4546 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4547 /* (TD, T2, TX2) */ 4548 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4549 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4550 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4551 /* 4552 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4553 */ 4554 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4555 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4556 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4557 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4558 4559 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4560 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4561 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4562 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4563 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4564 4565 /* 4566 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4567 */ 4568 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4569 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4570 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4571 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4572 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4573 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4574 4575 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4576 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4577 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4578 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4579 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4580 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4581 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4582 4583 /* 4584 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4585 */ 4586 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4587 { 4588 return float16_to_float32(a, true, s); 4589 } 4590 4591 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4592 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4593 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4594 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4595 4596 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4597 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4598 4599 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4600 /* (TD, T2, TX2) */ 4601 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4602 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4603 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4604 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4605 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4606 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4607 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4608 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4609 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4610 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4611 4612 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4613 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4614 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4615 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4616 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4617 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4618 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4619 4620 /* 4621 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4622 */ 4623 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4624 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4625 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4626 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4627 4628 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4629 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4630 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4631 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4632 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4633 4634 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4635 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4636 { 4637 return float32_to_float16(a, true, s); 4638 } 4639 4640 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4641 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4642 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4643 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4644 4645 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4646 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4647 4648 /* 4649 * Vector Reduction Operations 4650 */ 4651 /* Vector Single-Width Integer Reduction Instructions */ 4652 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4653 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4654 void *vs2, CPURISCVState *env, \ 4655 uint32_t desc) \ 4656 { \ 4657 uint32_t vm = vext_vm(desc); \ 4658 uint32_t vl = env->vl; \ 4659 uint32_t esz = sizeof(TD); \ 4660 uint32_t vlenb = simd_maxsz(desc); \ 4661 uint32_t vta = vext_vta(desc); \ 4662 uint32_t i; \ 4663 TD s1 = *((TD *)vs1 + HD(0)); \ 4664 \ 4665 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4666 \ 4667 for (i = env->vstart; i < vl; i++) { \ 4668 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4669 if (!vm && !vext_elem_mask(v0, i)) { \ 4670 continue; \ 4671 } \ 4672 s1 = OP(s1, (TD)s2); \ 4673 } \ 4674 if (vl > 0) { \ 4675 *((TD *)vd + HD(0)) = s1; \ 4676 } \ 4677 env->vstart = 0; \ 4678 /* set tail elements to 1s */ \ 4679 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4680 } 4681 4682 /* vd[0] = sum(vs1[0], vs2[*]) */ 4683 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4684 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4685 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4686 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4687 4688 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4689 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4690 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4691 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4692 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4693 4694 /* vd[0] = max(vs1[0], vs2[*]) */ 4695 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4696 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4697 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4698 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4699 4700 /* vd[0] = minu(vs1[0], vs2[*]) */ 4701 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4702 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4703 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4704 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4705 4706 /* vd[0] = min(vs1[0], vs2[*]) */ 4707 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4708 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4709 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4710 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4711 4712 /* vd[0] = and(vs1[0], vs2[*]) */ 4713 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4714 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4715 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4716 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4717 4718 /* vd[0] = or(vs1[0], vs2[*]) */ 4719 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4720 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4721 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4722 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4723 4724 /* vd[0] = xor(vs1[0], vs2[*]) */ 4725 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4726 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4727 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4728 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4729 4730 /* Vector Widening Integer Reduction Instructions */ 4731 /* signed sum reduction into double-width accumulator */ 4732 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4733 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4734 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4735 4736 /* Unsigned sum reduction into double-width accumulator */ 4737 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4738 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4739 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4740 4741 /* Vector Single-Width Floating-Point Reduction Instructions */ 4742 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4743 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4744 void *vs2, CPURISCVState *env, \ 4745 uint32_t desc) \ 4746 { \ 4747 uint32_t vm = vext_vm(desc); \ 4748 uint32_t vl = env->vl; \ 4749 uint32_t esz = sizeof(TD); \ 4750 uint32_t vlenb = simd_maxsz(desc); \ 4751 uint32_t vta = vext_vta(desc); \ 4752 uint32_t i; \ 4753 TD s1 = *((TD *)vs1 + HD(0)); \ 4754 \ 4755 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4756 \ 4757 for (i = env->vstart; i < vl; i++) { \ 4758 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4759 if (!vm && !vext_elem_mask(v0, i)) { \ 4760 continue; \ 4761 } \ 4762 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4763 } \ 4764 if (vl > 0) { \ 4765 *((TD *)vd + HD(0)) = s1; \ 4766 } \ 4767 env->vstart = 0; \ 4768 /* set tail elements to 1s */ \ 4769 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4770 } 4771 4772 /* Unordered sum */ 4773 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4774 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4775 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4776 4777 /* Ordered sum */ 4778 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4779 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4780 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4781 4782 /* Maximum value */ 4783 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4784 float16_maximum_number) 4785 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4786 float32_maximum_number) 4787 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4788 float64_maximum_number) 4789 4790 /* Minimum value */ 4791 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4792 float16_minimum_number) 4793 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4794 float32_minimum_number) 4795 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4796 float64_minimum_number) 4797 4798 /* Vector Widening Floating-Point Add Instructions */ 4799 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4800 { 4801 return float32_add(a, float16_to_float32(b, true, s), s); 4802 } 4803 4804 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4805 { 4806 return float64_add(a, float32_to_float64(b, s), s); 4807 } 4808 4809 /* Vector Widening Floating-Point Reduction Instructions */ 4810 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4811 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4812 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4813 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4814 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4815 4816 /* 4817 * Vector Mask Operations 4818 */ 4819 /* Vector Mask-Register Logical Instructions */ 4820 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4821 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4822 void *vs2, CPURISCVState *env, \ 4823 uint32_t desc) \ 4824 { \ 4825 uint32_t vl = env->vl; \ 4826 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4827 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4828 uint32_t i; \ 4829 int a, b; \ 4830 \ 4831 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4832 \ 4833 for (i = env->vstart; i < vl; i++) { \ 4834 a = vext_elem_mask(vs1, i); \ 4835 b = vext_elem_mask(vs2, i); \ 4836 vext_set_elem_mask(vd, i, OP(b, a)); \ 4837 } \ 4838 env->vstart = 0; \ 4839 /* 4840 * mask destination register are always tail-agnostic 4841 * set tail elements to 1s 4842 */ \ 4843 if (vta_all_1s) { \ 4844 for (; i < total_elems; i++) { \ 4845 vext_set_elem_mask(vd, i, 1); \ 4846 } \ 4847 } \ 4848 } 4849 4850 #define DO_NAND(N, M) (!(N & M)) 4851 #define DO_ANDNOT(N, M) (N & !M) 4852 #define DO_NOR(N, M) (!(N | M)) 4853 #define DO_ORNOT(N, M) (N | !M) 4854 #define DO_XNOR(N, M) (!(N ^ M)) 4855 4856 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4857 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4858 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4859 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4860 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4861 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4862 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4863 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4864 4865 /* Vector count population in mask vcpop */ 4866 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4867 uint32_t desc) 4868 { 4869 target_ulong cnt = 0; 4870 uint32_t vm = vext_vm(desc); 4871 uint32_t vl = env->vl; 4872 int i; 4873 4874 for (i = env->vstart; i < vl; i++) { 4875 if (vm || vext_elem_mask(v0, i)) { 4876 if (vext_elem_mask(vs2, i)) { 4877 cnt++; 4878 } 4879 } 4880 } 4881 env->vstart = 0; 4882 return cnt; 4883 } 4884 4885 /* vfirst find-first-set mask bit */ 4886 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4887 uint32_t desc) 4888 { 4889 uint32_t vm = vext_vm(desc); 4890 uint32_t vl = env->vl; 4891 int i; 4892 4893 for (i = env->vstart; i < vl; i++) { 4894 if (vm || vext_elem_mask(v0, i)) { 4895 if (vext_elem_mask(vs2, i)) { 4896 return i; 4897 } 4898 } 4899 } 4900 env->vstart = 0; 4901 return -1LL; 4902 } 4903 4904 enum set_mask_type { 4905 ONLY_FIRST = 1, 4906 INCLUDE_FIRST, 4907 BEFORE_FIRST, 4908 }; 4909 4910 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4911 uint32_t desc, enum set_mask_type type) 4912 { 4913 uint32_t vm = vext_vm(desc); 4914 uint32_t vl = env->vl; 4915 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4916 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4917 uint32_t vma = vext_vma(desc); 4918 int i; 4919 bool first_mask_bit = false; 4920 4921 VSTART_CHECK_EARLY_EXIT(env, vl); 4922 4923 for (i = env->vstart; i < vl; i++) { 4924 if (!vm && !vext_elem_mask(v0, i)) { 4925 /* set masked-off elements to 1s */ 4926 if (vma) { 4927 vext_set_elem_mask(vd, i, 1); 4928 } 4929 continue; 4930 } 4931 /* write a zero to all following active elements */ 4932 if (first_mask_bit) { 4933 vext_set_elem_mask(vd, i, 0); 4934 continue; 4935 } 4936 if (vext_elem_mask(vs2, i)) { 4937 first_mask_bit = true; 4938 if (type == BEFORE_FIRST) { 4939 vext_set_elem_mask(vd, i, 0); 4940 } else { 4941 vext_set_elem_mask(vd, i, 1); 4942 } 4943 } else { 4944 if (type == ONLY_FIRST) { 4945 vext_set_elem_mask(vd, i, 0); 4946 } else { 4947 vext_set_elem_mask(vd, i, 1); 4948 } 4949 } 4950 } 4951 env->vstart = 0; 4952 /* 4953 * mask destination register are always tail-agnostic 4954 * set tail elements to 1s 4955 */ 4956 if (vta_all_1s) { 4957 for (; i < total_elems; i++) { 4958 vext_set_elem_mask(vd, i, 1); 4959 } 4960 } 4961 } 4962 4963 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4964 uint32_t desc) 4965 { 4966 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4967 } 4968 4969 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4970 uint32_t desc) 4971 { 4972 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4973 } 4974 4975 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4976 uint32_t desc) 4977 { 4978 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4979 } 4980 4981 /* Vector Iota Instruction */ 4982 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4983 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4984 uint32_t desc) \ 4985 { \ 4986 uint32_t vm = vext_vm(desc); \ 4987 uint32_t vl = env->vl; \ 4988 uint32_t esz = sizeof(ETYPE); \ 4989 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4990 uint32_t vta = vext_vta(desc); \ 4991 uint32_t vma = vext_vma(desc); \ 4992 uint32_t sum = 0; \ 4993 int i; \ 4994 \ 4995 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4996 \ 4997 for (i = env->vstart; i < vl; i++) { \ 4998 if (!vm && !vext_elem_mask(v0, i)) { \ 4999 /* set masked-off elements to 1s */ \ 5000 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5001 continue; \ 5002 } \ 5003 *((ETYPE *)vd + H(i)) = sum; \ 5004 if (vext_elem_mask(vs2, i)) { \ 5005 sum++; \ 5006 } \ 5007 } \ 5008 env->vstart = 0; \ 5009 /* set tail elements to 1s */ \ 5010 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5011 } 5012 5013 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 5014 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 5015 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 5016 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 5017 5018 /* Vector Element Index Instruction */ 5019 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 5020 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 5021 { \ 5022 uint32_t vm = vext_vm(desc); \ 5023 uint32_t vl = env->vl; \ 5024 uint32_t esz = sizeof(ETYPE); \ 5025 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5026 uint32_t vta = vext_vta(desc); \ 5027 uint32_t vma = vext_vma(desc); \ 5028 int i; \ 5029 \ 5030 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5031 \ 5032 for (i = env->vstart; i < vl; i++) { \ 5033 if (!vm && !vext_elem_mask(v0, i)) { \ 5034 /* set masked-off elements to 1s */ \ 5035 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5036 continue; \ 5037 } \ 5038 *((ETYPE *)vd + H(i)) = i; \ 5039 } \ 5040 env->vstart = 0; \ 5041 /* set tail elements to 1s */ \ 5042 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5043 } 5044 5045 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 5046 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 5047 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 5048 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 5049 5050 /* 5051 * Vector Permutation Instructions 5052 */ 5053 5054 /* Vector Slide Instructions */ 5055 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 5056 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5057 CPURISCVState *env, uint32_t desc) \ 5058 { \ 5059 uint32_t vm = vext_vm(desc); \ 5060 uint32_t vl = env->vl; \ 5061 uint32_t esz = sizeof(ETYPE); \ 5062 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5063 uint32_t vta = vext_vta(desc); \ 5064 uint32_t vma = vext_vma(desc); \ 5065 target_ulong offset = s1, i_min, i; \ 5066 \ 5067 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5068 \ 5069 i_min = MAX(env->vstart, offset); \ 5070 for (i = i_min; i < vl; i++) { \ 5071 if (!vm && !vext_elem_mask(v0, i)) { \ 5072 /* set masked-off elements to 1s */ \ 5073 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5074 continue; \ 5075 } \ 5076 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5077 } \ 5078 env->vstart = 0; \ 5079 /* set tail elements to 1s */ \ 5080 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5081 } 5082 5083 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5084 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5085 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5086 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5087 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5088 5089 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5090 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5091 CPURISCVState *env, uint32_t desc) \ 5092 { \ 5093 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5094 uint32_t vm = vext_vm(desc); \ 5095 uint32_t vl = env->vl; \ 5096 uint32_t esz = sizeof(ETYPE); \ 5097 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5098 uint32_t vta = vext_vta(desc); \ 5099 uint32_t vma = vext_vma(desc); \ 5100 target_ulong i_max, i_min, i; \ 5101 \ 5102 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5103 \ 5104 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 5105 i_max = MAX(i_min, env->vstart); \ 5106 for (i = env->vstart; i < i_max; ++i) { \ 5107 if (!vm && !vext_elem_mask(v0, i)) { \ 5108 /* set masked-off elements to 1s */ \ 5109 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5110 continue; \ 5111 } \ 5112 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5113 } \ 5114 \ 5115 for (i = i_max; i < vl; ++i) { \ 5116 if (vm || vext_elem_mask(v0, i)) { \ 5117 *((ETYPE *)vd + H(i)) = 0; \ 5118 } \ 5119 } \ 5120 \ 5121 env->vstart = 0; \ 5122 /* set tail elements to 1s */ \ 5123 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5124 } 5125 5126 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5127 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5128 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5129 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5130 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5131 5132 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5133 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5134 void *vs2, CPURISCVState *env, \ 5135 uint32_t desc) \ 5136 { \ 5137 typedef uint##BITWIDTH##_t ETYPE; \ 5138 uint32_t vm = vext_vm(desc); \ 5139 uint32_t vl = env->vl; \ 5140 uint32_t esz = sizeof(ETYPE); \ 5141 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5142 uint32_t vta = vext_vta(desc); \ 5143 uint32_t vma = vext_vma(desc); \ 5144 uint32_t i; \ 5145 \ 5146 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5147 \ 5148 for (i = env->vstart; i < vl; i++) { \ 5149 if (!vm && !vext_elem_mask(v0, i)) { \ 5150 /* set masked-off elements to 1s */ \ 5151 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5152 continue; \ 5153 } \ 5154 if (i == 0) { \ 5155 *((ETYPE *)vd + H(i)) = s1; \ 5156 } else { \ 5157 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5158 } \ 5159 } \ 5160 env->vstart = 0; \ 5161 /* set tail elements to 1s */ \ 5162 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5163 } 5164 5165 GEN_VEXT_VSLIE1UP(8, H1) 5166 GEN_VEXT_VSLIE1UP(16, H2) 5167 GEN_VEXT_VSLIE1UP(32, H4) 5168 GEN_VEXT_VSLIE1UP(64, H8) 5169 5170 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5171 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5172 CPURISCVState *env, uint32_t desc) \ 5173 { \ 5174 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5175 } 5176 5177 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5178 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5179 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5180 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5181 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5182 5183 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5184 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5185 void *vs2, CPURISCVState *env, \ 5186 uint32_t desc) \ 5187 { \ 5188 typedef uint##BITWIDTH##_t ETYPE; \ 5189 uint32_t vm = vext_vm(desc); \ 5190 uint32_t vl = env->vl; \ 5191 uint32_t esz = sizeof(ETYPE); \ 5192 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5193 uint32_t vta = vext_vta(desc); \ 5194 uint32_t vma = vext_vma(desc); \ 5195 uint32_t i; \ 5196 \ 5197 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5198 \ 5199 for (i = env->vstart; i < vl; i++) { \ 5200 if (!vm && !vext_elem_mask(v0, i)) { \ 5201 /* set masked-off elements to 1s */ \ 5202 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5203 continue; \ 5204 } \ 5205 if (i == vl - 1) { \ 5206 *((ETYPE *)vd + H(i)) = s1; \ 5207 } else { \ 5208 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5209 } \ 5210 } \ 5211 env->vstart = 0; \ 5212 /* set tail elements to 1s */ \ 5213 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5214 } 5215 5216 GEN_VEXT_VSLIDE1DOWN(8, H1) 5217 GEN_VEXT_VSLIDE1DOWN(16, H2) 5218 GEN_VEXT_VSLIDE1DOWN(32, H4) 5219 GEN_VEXT_VSLIDE1DOWN(64, H8) 5220 5221 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5222 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5223 CPURISCVState *env, uint32_t desc) \ 5224 { \ 5225 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5226 } 5227 5228 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5229 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5230 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5231 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5232 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5233 5234 /* Vector Floating-Point Slide Instructions */ 5235 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5236 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5237 CPURISCVState *env, uint32_t desc) \ 5238 { \ 5239 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5240 } 5241 5242 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5243 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5244 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5245 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5246 5247 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5248 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5249 CPURISCVState *env, uint32_t desc) \ 5250 { \ 5251 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5252 } 5253 5254 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5255 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5256 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5257 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5258 5259 /* Vector Register Gather Instruction */ 5260 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5261 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5262 CPURISCVState *env, uint32_t desc) \ 5263 { \ 5264 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5265 uint32_t vm = vext_vm(desc); \ 5266 uint32_t vl = env->vl; \ 5267 uint32_t esz = sizeof(TS2); \ 5268 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5269 uint32_t vta = vext_vta(desc); \ 5270 uint32_t vma = vext_vma(desc); \ 5271 uint64_t index; \ 5272 uint32_t i; \ 5273 \ 5274 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5275 \ 5276 for (i = env->vstart; i < vl; i++) { \ 5277 if (!vm && !vext_elem_mask(v0, i)) { \ 5278 /* set masked-off elements to 1s */ \ 5279 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5280 continue; \ 5281 } \ 5282 index = *((TS1 *)vs1 + HS1(i)); \ 5283 if (index >= vlmax) { \ 5284 *((TS2 *)vd + HS2(i)) = 0; \ 5285 } else { \ 5286 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5287 } \ 5288 } \ 5289 env->vstart = 0; \ 5290 /* set tail elements to 1s */ \ 5291 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5292 } 5293 5294 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5295 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5296 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5297 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5298 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5299 5300 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5301 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5302 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5303 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5304 5305 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5306 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5307 CPURISCVState *env, uint32_t desc) \ 5308 { \ 5309 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5310 uint32_t vm = vext_vm(desc); \ 5311 uint32_t vl = env->vl; \ 5312 uint32_t esz = sizeof(ETYPE); \ 5313 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5314 uint32_t vta = vext_vta(desc); \ 5315 uint32_t vma = vext_vma(desc); \ 5316 uint64_t index = s1; \ 5317 uint32_t i; \ 5318 \ 5319 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5320 \ 5321 for (i = env->vstart; i < vl; i++) { \ 5322 if (!vm && !vext_elem_mask(v0, i)) { \ 5323 /* set masked-off elements to 1s */ \ 5324 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5325 continue; \ 5326 } \ 5327 if (index >= vlmax) { \ 5328 *((ETYPE *)vd + H(i)) = 0; \ 5329 } else { \ 5330 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5331 } \ 5332 } \ 5333 env->vstart = 0; \ 5334 /* set tail elements to 1s */ \ 5335 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5336 } 5337 5338 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5339 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5340 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5341 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5342 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5343 5344 /* Vector Compress Instruction */ 5345 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5346 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5347 CPURISCVState *env, uint32_t desc) \ 5348 { \ 5349 uint32_t vl = env->vl; \ 5350 uint32_t esz = sizeof(ETYPE); \ 5351 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5352 uint32_t vta = vext_vta(desc); \ 5353 uint32_t num = 0, i; \ 5354 \ 5355 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5356 \ 5357 for (i = env->vstart; i < vl; i++) { \ 5358 if (!vext_elem_mask(vs1, i)) { \ 5359 continue; \ 5360 } \ 5361 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5362 num++; \ 5363 } \ 5364 env->vstart = 0; \ 5365 /* set tail elements to 1s */ \ 5366 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ 5367 } 5368 5369 /* Compress into vd elements of vs2 where vs1 is enabled */ 5370 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5371 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5372 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5373 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5374 5375 /* Vector Whole Register Move */ 5376 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5377 { 5378 /* EEW = SEW */ 5379 uint32_t maxsz = simd_maxsz(desc); 5380 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5381 uint32_t startb = env->vstart * sewb; 5382 uint32_t i = startb; 5383 5384 if (startb >= maxsz) { 5385 env->vstart = 0; 5386 return; 5387 } 5388 5389 if (HOST_BIG_ENDIAN && i % 8 != 0) { 5390 uint32_t j = ROUND_UP(i, 8); 5391 memcpy((uint8_t *)vd + H1(j - 1), 5392 (uint8_t *)vs2 + H1(j - 1), 5393 j - i); 5394 i = j; 5395 } 5396 5397 memcpy((uint8_t *)vd + H1(i), 5398 (uint8_t *)vs2 + H1(i), 5399 maxsz - i); 5400 5401 env->vstart = 0; 5402 } 5403 5404 /* Vector Integer Extension */ 5405 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5406 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5407 CPURISCVState *env, uint32_t desc) \ 5408 { \ 5409 uint32_t vl = env->vl; \ 5410 uint32_t vm = vext_vm(desc); \ 5411 uint32_t esz = sizeof(ETYPE); \ 5412 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5413 uint32_t vta = vext_vta(desc); \ 5414 uint32_t vma = vext_vma(desc); \ 5415 uint32_t i; \ 5416 \ 5417 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5418 \ 5419 for (i = env->vstart; i < vl; i++) { \ 5420 if (!vm && !vext_elem_mask(v0, i)) { \ 5421 /* set masked-off elements to 1s */ \ 5422 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5423 continue; \ 5424 } \ 5425 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5426 } \ 5427 env->vstart = 0; \ 5428 /* set tail elements to 1s */ \ 5429 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5430 } 5431 5432 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5433 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5434 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5435 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5436 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5437 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5438 5439 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5440 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5441 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5442 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5443 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5444 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5445