1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/page-protection.h" 27 #include "exec/helper-proto.h" 28 #include "exec/tlb-flags.h" 29 #include "fpu/softfloat.h" 30 #include "tcg/tcg-gvec-desc.h" 31 #include "internals.h" 32 #include "vector_internals.h" 33 #include <math.h> 34 35 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 36 target_ulong s2) 37 { 38 int vlmax, vl; 39 RISCVCPU *cpu = env_archcpu(env); 40 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 41 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 42 uint16_t sew = 8 << vsew; 43 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 44 int xlen = riscv_cpu_xlen(env); 45 bool vill = (s2 >> (xlen - 1)) & 0x1; 46 target_ulong reserved = s2 & 47 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 48 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 49 uint16_t vlen = cpu->cfg.vlenb << 3; 50 int8_t lmul; 51 52 if (vlmul & 4) { 53 /* 54 * Fractional LMUL, check: 55 * 56 * VLEN * LMUL >= SEW 57 * VLEN >> (8 - lmul) >= sew 58 * (vlenb << 3) >> (8 - lmul) >= sew 59 */ 60 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { 61 vill = true; 62 } 63 } 64 65 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 66 /* only set vill bit. */ 67 env->vill = 1; 68 env->vtype = 0; 69 env->vl = 0; 70 env->vstart = 0; 71 return 0; 72 } 73 74 /* lmul encoded as in DisasContext::lmul */ 75 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 76 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 77 if (s1 <= vlmax) { 78 vl = s1; 79 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) { 80 vl = (s1 + 1) >> 1; 81 } else { 82 vl = vlmax; 83 } 84 env->vl = vl; 85 env->vtype = s2; 86 env->vstart = 0; 87 env->vill = 0; 88 return vl; 89 } 90 91 /* 92 * Get the maximum number of elements can be operated. 93 * 94 * log2_esz: log2 of element size in bytes. 95 */ 96 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 97 { 98 /* 99 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 100 * so vlen in bytes (vlenb) is encoded as maxsz. 101 */ 102 uint32_t vlenb = simd_maxsz(desc); 103 104 /* Return VLMAX */ 105 int scale = vext_lmul(desc) - log2_esz; 106 return scale < 0 ? vlenb >> -scale : vlenb << scale; 107 } 108 109 /* 110 * This function checks watchpoint before real load operation. 111 * 112 * In system mode, the TLB API probe_access is enough for watchpoint check. 113 * In user mode, there is no watchpoint support now. 114 * 115 * It will trigger an exception if there is no mapping in TLB 116 * and page table walk can't fill the TLB entry. Then the guest 117 * software can return here after process the exception or never return. 118 */ 119 static void probe_pages(CPURISCVState *env, target_ulong addr, 120 target_ulong len, uintptr_t ra, 121 MMUAccessType access_type) 122 { 123 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 124 target_ulong curlen = MIN(pagelen, len); 125 int mmu_index = riscv_env_mmu_index(env, false); 126 127 probe_access(env, adjust_addr(env, addr), curlen, access_type, 128 mmu_index, ra); 129 if (len > curlen) { 130 addr += curlen; 131 curlen = len - curlen; 132 probe_access(env, adjust_addr(env, addr), curlen, access_type, 133 mmu_index, ra); 134 } 135 } 136 137 static inline void vext_set_elem_mask(void *v0, int index, 138 uint8_t value) 139 { 140 int idx = index / 64; 141 int pos = index % 64; 142 uint64_t old = ((uint64_t *)v0)[idx]; 143 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 144 } 145 146 /* elements operations for load and store */ 147 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, 148 uint32_t idx, void *vd, uintptr_t retaddr); 149 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); 150 151 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 152 static inline QEMU_ALWAYS_INLINE \ 153 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 154 uint32_t idx, void *vd, uintptr_t retaddr) \ 155 { \ 156 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 157 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 158 } \ 159 \ 160 static inline QEMU_ALWAYS_INLINE \ 161 void NAME##_host(void *vd, uint32_t idx, void *host) \ 162 { \ 163 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 164 *cur = (ETYPE)LDSUF##_p(host); \ 165 } 166 167 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub) 168 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw) 169 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) 170 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) 171 172 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 173 static inline QEMU_ALWAYS_INLINE \ 174 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 175 uint32_t idx, void *vd, uintptr_t retaddr) \ 176 { \ 177 ETYPE data = *((ETYPE *)vd + H(idx)); \ 178 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 179 } \ 180 \ 181 static inline QEMU_ALWAYS_INLINE \ 182 void NAME##_host(void *vd, uint32_t idx, void *host) \ 183 { \ 184 ETYPE data = *((ETYPE *)vd + H(idx)); \ 185 STSUF##_p(host, data); \ 186 } 187 188 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb) 189 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw) 190 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl) 191 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq) 192 193 static inline QEMU_ALWAYS_INLINE void 194 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb, 195 void *vd, uint32_t evl, target_ulong addr, 196 uint32_t reg_start, uintptr_t ra, uint32_t esz, 197 bool is_load) 198 { 199 uint32_t i; 200 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) { 201 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra); 202 } 203 } 204 205 static inline QEMU_ALWAYS_INLINE void 206 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host, 207 void *vd, uint32_t evl, uint32_t reg_start, void *host, 208 uint32_t esz, bool is_load) 209 { 210 #if HOST_BIG_ENDIAN 211 for (; reg_start < evl; reg_start++, host += esz) { 212 ldst_host(vd, reg_start, host); 213 } 214 #else 215 if (esz == 1) { 216 uint32_t byte_offset = reg_start * esz; 217 uint32_t size = (evl - reg_start) * esz; 218 219 if (is_load) { 220 memcpy(vd + byte_offset, host, size); 221 } else { 222 memcpy(host, vd + byte_offset, size); 223 } 224 } else { 225 for (; reg_start < evl; reg_start++, host += esz) { 226 ldst_host(vd, reg_start, host); 227 } 228 } 229 #endif 230 } 231 232 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 233 uint32_t desc, uint32_t nf, 234 uint32_t esz, uint32_t max_elems) 235 { 236 uint32_t vta = vext_vta(desc); 237 int k; 238 239 if (vta == 0) { 240 return; 241 } 242 243 for (k = 0; k < nf; ++k) { 244 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 245 (k * max_elems + max_elems) * esz); 246 } 247 } 248 249 /* 250 * stride: access vector element from strided memory 251 */ 252 static void 253 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, 254 CPURISCVState *env, uint32_t desc, uint32_t vm, 255 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz, 256 uintptr_t ra) 257 { 258 uint32_t i, k; 259 uint32_t nf = vext_nf(desc); 260 uint32_t max_elems = vext_max_elems(desc, log2_esz); 261 uint32_t esz = 1 << log2_esz; 262 uint32_t vma = vext_vma(desc); 263 264 VSTART_CHECK_EARLY_EXIT(env, env->vl); 265 266 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 267 k = 0; 268 while (k < nf) { 269 if (!vm && !vext_elem_mask(v0, i)) { 270 /* set masked-off elements to 1s */ 271 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 272 (i + k * max_elems + 1) * esz); 273 k++; 274 continue; 275 } 276 target_ulong addr = base + stride * i + (k << log2_esz); 277 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 278 k++; 279 } 280 } 281 env->vstart = 0; 282 283 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 284 } 285 286 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 287 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 288 target_ulong stride, CPURISCVState *env, \ 289 uint32_t desc) \ 290 { \ 291 uint32_t vm = vext_vm(desc); \ 292 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 293 ctzl(sizeof(ETYPE)), GETPC()); \ 294 } 295 296 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb) 297 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb) 298 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb) 299 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb) 300 301 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 302 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 303 target_ulong stride, CPURISCVState *env, \ 304 uint32_t desc) \ 305 { \ 306 uint32_t vm = vext_vm(desc); \ 307 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 308 ctzl(sizeof(ETYPE)), GETPC()); \ 309 } 310 311 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb) 312 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb) 313 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb) 314 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) 315 316 /* 317 * unit-stride: access elements stored contiguously in memory 318 */ 319 320 /* unmasked unit-stride load and store operation */ 321 static inline QEMU_ALWAYS_INLINE void 322 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, 323 uint32_t elems, uint32_t nf, uint32_t max_elems, 324 uint32_t log2_esz, bool is_load, int mmu_index, 325 vext_ldst_elem_fn_tlb *ldst_tlb, 326 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra) 327 { 328 void *host; 329 int i, k, flags; 330 uint32_t esz = 1 << log2_esz; 331 uint32_t size = (elems * nf) << log2_esz; 332 uint32_t evl = env->vstart + elems; 333 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 334 335 /* Check page permission/pmp/watchpoint/etc. */ 336 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type, 337 mmu_index, true, &host, ra); 338 339 if (flags == 0) { 340 if (nf == 1) { 341 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart, 342 host, esz, is_load); 343 } else { 344 for (i = env->vstart; i < evl; ++i) { 345 k = 0; 346 while (k < nf) { 347 ldst_host(vd, i + k * max_elems, host); 348 host += esz; 349 k++; 350 } 351 } 352 } 353 env->vstart += elems; 354 } else { 355 if (nf == 1) { 356 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, 357 ra, esz, is_load); 358 } else { 359 /* load bytes from guest memory */ 360 for (i = env->vstart; i < evl; env->vstart = ++i) { 361 k = 0; 362 while (k < nf) { 363 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 364 vd, ra); 365 addr += esz; 366 k++; 367 } 368 } 369 } 370 } 371 } 372 373 static inline QEMU_ALWAYS_INLINE void 374 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 375 vext_ldst_elem_fn_tlb *ldst_tlb, 376 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 377 uint32_t evl, uintptr_t ra, bool is_load) 378 { 379 uint32_t k; 380 target_ulong page_split, elems, addr; 381 uint32_t nf = vext_nf(desc); 382 uint32_t max_elems = vext_max_elems(desc, log2_esz); 383 uint32_t esz = 1 << log2_esz; 384 uint32_t msize = nf * esz; 385 int mmu_index = riscv_env_mmu_index(env, false); 386 387 VSTART_CHECK_EARLY_EXIT(env, evl); 388 389 #if defined(CONFIG_USER_ONLY) 390 /* 391 * For data sizes <= 6 bytes we get better performance by simply calling 392 * vext_continuous_ldst_tlb 393 */ 394 if (nf == 1 && (evl << log2_esz) <= 6) { 395 addr = base + (env->vstart << log2_esz); 396 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra, 397 esz, is_load); 398 399 env->vstart = 0; 400 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 401 return; 402 } 403 #endif 404 405 /* Calculate the page range of first page */ 406 addr = base + ((env->vstart * nf) << log2_esz); 407 page_split = -(addr | TARGET_PAGE_MASK); 408 /* Get number of elements */ 409 elems = page_split / msize; 410 if (unlikely(env->vstart + elems >= evl)) { 411 elems = evl - env->vstart; 412 } 413 414 /* Load/store elements in the first page */ 415 if (likely(elems)) { 416 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 417 is_load, mmu_index, ldst_tlb, ldst_host, ra); 418 } 419 420 /* Load/store elements in the second page */ 421 if (unlikely(env->vstart < evl)) { 422 /* Cross page element */ 423 if (unlikely(page_split % msize)) { 424 for (k = 0; k < nf; k++) { 425 addr = base + ((env->vstart * nf + k) << log2_esz); 426 ldst_tlb(env, adjust_addr(env, addr), 427 env->vstart + k * max_elems, vd, ra); 428 } 429 env->vstart++; 430 } 431 432 addr = base + ((env->vstart * nf) << log2_esz); 433 /* Get number of elements of second page */ 434 elems = evl - env->vstart; 435 436 /* Load/store elements in the second page */ 437 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 438 is_load, mmu_index, ldst_tlb, ldst_host, ra); 439 } 440 441 env->vstart = 0; 442 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 443 } 444 445 /* 446 * masked unit-stride load and store operation will be a special case of 447 * stride, stride = NF * sizeof (ETYPE) 448 */ 449 450 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 451 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 452 CPURISCVState *env, uint32_t desc) \ 453 { \ 454 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 455 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 456 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 457 } \ 458 \ 459 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 460 CPURISCVState *env, uint32_t desc) \ 461 { \ 462 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 463 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \ 464 } 465 466 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host) 467 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host) 468 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host) 469 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host) 470 471 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 472 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 473 CPURISCVState *env, uint32_t desc) \ 474 { \ 475 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 476 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 477 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 478 } \ 479 \ 480 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 481 CPURISCVState *env, uint32_t desc) \ 482 { \ 483 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 484 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \ 485 } 486 487 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host) 488 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host) 489 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host) 490 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host) 491 492 /* 493 * unit stride mask load and store, EEW = 1 494 */ 495 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 496 CPURISCVState *env, uint32_t desc) 497 { 498 /* evl = ceil(vl/8) */ 499 uint8_t evl = (env->vl + 7) >> 3; 500 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host, 501 0, evl, GETPC(), true); 502 } 503 504 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 505 CPURISCVState *env, uint32_t desc) 506 { 507 /* evl = ceil(vl/8) */ 508 uint8_t evl = (env->vl + 7) >> 3; 509 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host, 510 0, evl, GETPC(), false); 511 } 512 513 /* 514 * index: access vector element from indexed memory 515 */ 516 typedef target_ulong vext_get_index_addr(target_ulong base, 517 uint32_t idx, void *vs2); 518 519 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 520 static target_ulong NAME(target_ulong base, \ 521 uint32_t idx, void *vs2) \ 522 { \ 523 return (base + *((ETYPE *)vs2 + H(idx))); \ 524 } 525 526 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 527 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 528 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 529 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 530 531 static inline void 532 vext_ldst_index(void *vd, void *v0, target_ulong base, 533 void *vs2, CPURISCVState *env, uint32_t desc, 534 vext_get_index_addr get_index_addr, 535 vext_ldst_elem_fn_tlb *ldst_elem, 536 uint32_t log2_esz, uintptr_t ra) 537 { 538 uint32_t i, k; 539 uint32_t nf = vext_nf(desc); 540 uint32_t vm = vext_vm(desc); 541 uint32_t max_elems = vext_max_elems(desc, log2_esz); 542 uint32_t esz = 1 << log2_esz; 543 uint32_t vma = vext_vma(desc); 544 545 VSTART_CHECK_EARLY_EXIT(env, env->vl); 546 547 /* load bytes from guest memory */ 548 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 549 k = 0; 550 while (k < nf) { 551 if (!vm && !vext_elem_mask(v0, i)) { 552 /* set masked-off elements to 1s */ 553 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 554 (i + k * max_elems + 1) * esz); 555 k++; 556 continue; 557 } 558 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 559 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 560 k++; 561 } 562 } 563 env->vstart = 0; 564 565 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 566 } 567 568 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 569 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 570 void *vs2, CPURISCVState *env, uint32_t desc) \ 571 { \ 572 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 573 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 574 } 575 576 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb) 577 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb) 578 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb) 579 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb) 580 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb) 581 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb) 582 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb) 583 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb) 584 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb) 585 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb) 586 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb) 587 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb) 588 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb) 589 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb) 590 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb) 591 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb) 592 593 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 594 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 595 void *vs2, CPURISCVState *env, uint32_t desc) \ 596 { \ 597 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 598 STORE_FN, ctzl(sizeof(ETYPE)), \ 599 GETPC()); \ 600 } 601 602 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb) 603 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb) 604 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb) 605 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb) 606 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb) 607 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb) 608 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb) 609 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb) 610 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb) 611 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb) 612 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb) 613 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb) 614 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb) 615 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb) 616 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb) 617 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb) 618 619 /* 620 * unit-stride fault-only-fisrt load instructions 621 */ 622 static inline void 623 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, 624 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, 625 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra) 626 { 627 uint32_t i, k, vl = 0; 628 uint32_t nf = vext_nf(desc); 629 uint32_t vm = vext_vm(desc); 630 uint32_t max_elems = vext_max_elems(desc, log2_esz); 631 uint32_t esz = 1 << log2_esz; 632 uint32_t msize = nf * esz; 633 uint32_t vma = vext_vma(desc); 634 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems; 635 int mmu_index = riscv_env_mmu_index(env, false); 636 int flags; 637 void *host; 638 639 VSTART_CHECK_EARLY_EXIT(env, env->vl); 640 641 addr = base + ((env->vstart * nf) << log2_esz); 642 page_split = -(addr | TARGET_PAGE_MASK); 643 /* Get number of elements */ 644 elems = page_split / msize; 645 if (unlikely(env->vstart + elems >= env->vl)) { 646 elems = env->vl - env->vstart; 647 } 648 649 /* Check page permission/pmp/watchpoint/etc. */ 650 flags = probe_access_flags(env, adjust_addr(env, addr), elems * msize, 651 MMU_DATA_LOAD, mmu_index, true, &host, ra); 652 653 /* If we are crossing a page check also the second page. */ 654 if (env->vl > elems) { 655 addr_probe = addr + (elems << log2_esz); 656 flags |= probe_access_flags(env, adjust_addr(env, addr_probe), 657 elems * msize, MMU_DATA_LOAD, mmu_index, 658 true, &host, ra); 659 } 660 661 if (flags & ~TLB_WATCHPOINT) { 662 /* probe every access */ 663 for (i = env->vstart; i < env->vl; i++) { 664 if (!vm && !vext_elem_mask(v0, i)) { 665 continue; 666 } 667 addr_i = adjust_addr(env, base + i * (nf << log2_esz)); 668 if (i == 0) { 669 /* Allow fault on first element. */ 670 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD); 671 } else { 672 remain = nf << log2_esz; 673 while (remain > 0) { 674 offset = -(addr_i | TARGET_PAGE_MASK); 675 676 /* Probe nonfault on subsequent elements. */ 677 flags = probe_access_flags(env, addr_i, offset, 678 MMU_DATA_LOAD, mmu_index, true, 679 &host, 0); 680 681 /* 682 * Stop if invalid (unmapped) or mmio (transaction may 683 * fail). Do not stop if watchpoint, as the spec says that 684 * first-fault should continue to access the same 685 * elements regardless of any watchpoint. 686 */ 687 if (flags & ~TLB_WATCHPOINT) { 688 vl = i; 689 goto ProbeSuccess; 690 } 691 if (remain <= offset) { 692 break; 693 } 694 remain -= offset; 695 addr_i = adjust_addr(env, addr_i + offset); 696 } 697 } 698 } 699 } 700 ProbeSuccess: 701 /* load bytes from guest memory */ 702 if (vl != 0) { 703 env->vl = vl; 704 } 705 706 if (env->vstart < env->vl) { 707 if (vm) { 708 /* Load/store elements in the first page */ 709 if (likely(elems)) { 710 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 711 log2_esz, true, mmu_index, ldst_tlb, 712 ldst_host, ra); 713 } 714 715 /* Load/store elements in the second page */ 716 if (unlikely(env->vstart < env->vl)) { 717 /* Cross page element */ 718 if (unlikely(page_split % msize)) { 719 for (k = 0; k < nf; k++) { 720 addr = base + ((env->vstart * nf + k) << log2_esz); 721 ldst_tlb(env, adjust_addr(env, addr), 722 env->vstart + k * max_elems, vd, ra); 723 } 724 env->vstart++; 725 } 726 727 addr = base + ((env->vstart * nf) << log2_esz); 728 /* Get number of elements of second page */ 729 elems = env->vl - env->vstart; 730 731 /* Load/store elements in the second page */ 732 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 733 log2_esz, true, mmu_index, ldst_tlb, 734 ldst_host, ra); 735 } 736 } else { 737 for (i = env->vstart; i < env->vl; i++) { 738 k = 0; 739 while (k < nf) { 740 if (!vext_elem_mask(v0, i)) { 741 /* set masked-off elements to 1s */ 742 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 743 (i + k * max_elems + 1) * esz); 744 k++; 745 continue; 746 } 747 addr = base + ((i * nf + k) << log2_esz); 748 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 749 vd, ra); 750 k++; 751 } 752 } 753 } 754 } 755 env->vstart = 0; 756 757 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 758 } 759 760 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 761 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 762 CPURISCVState *env, uint32_t desc) \ 763 { \ 764 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \ 765 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \ 766 } 767 768 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host) 769 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host) 770 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host) 771 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) 772 773 #define DO_SWAP(N, M) (M) 774 #define DO_AND(N, M) (N & M) 775 #define DO_XOR(N, M) (N ^ M) 776 #define DO_OR(N, M) (N | M) 777 #define DO_ADD(N, M) (N + M) 778 779 /* Signed min/max */ 780 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 781 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 782 783 /* 784 * load and store whole register instructions 785 */ 786 static inline QEMU_ALWAYS_INLINE void 787 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 788 vext_ldst_elem_fn_tlb *ldst_tlb, 789 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 790 uintptr_t ra, bool is_load) 791 { 792 target_ulong page_split, elems, addr; 793 uint32_t nf = vext_nf(desc); 794 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 795 uint32_t max_elems = vlenb >> log2_esz; 796 uint32_t evl = nf * max_elems; 797 uint32_t esz = 1 << log2_esz; 798 int mmu_index = riscv_env_mmu_index(env, false); 799 800 /* Calculate the page range of first page */ 801 addr = base + (env->vstart << log2_esz); 802 page_split = -(addr | TARGET_PAGE_MASK); 803 /* Get number of elements */ 804 elems = page_split / esz; 805 if (unlikely(env->vstart + elems >= evl)) { 806 elems = evl - env->vstart; 807 } 808 809 /* Load/store elements in the first page */ 810 if (likely(elems)) { 811 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 812 is_load, mmu_index, ldst_tlb, ldst_host, ra); 813 } 814 815 /* Load/store elements in the second page */ 816 if (unlikely(env->vstart < evl)) { 817 /* Cross page element */ 818 if (unlikely(page_split % esz)) { 819 addr = base + (env->vstart << log2_esz); 820 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra); 821 env->vstart++; 822 } 823 824 addr = base + (env->vstart << log2_esz); 825 /* Get number of elements of second page */ 826 elems = evl - env->vstart; 827 828 /* Load/store elements in the second page */ 829 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 830 is_load, mmu_index, ldst_tlb, ldst_host, ra); 831 } 832 833 env->vstart = 0; 834 } 835 836 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 837 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 838 uint32_t desc) \ 839 { \ 840 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 841 ctzl(sizeof(ETYPE)), GETPC(), true); \ 842 } 843 844 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host) 845 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host) 846 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host) 847 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host) 848 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host) 849 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host) 850 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host) 851 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host) 852 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host) 853 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host) 854 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host) 855 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host) 856 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host) 857 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host) 858 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host) 859 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host) 860 861 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 862 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 863 uint32_t desc) \ 864 { \ 865 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 866 ctzl(sizeof(ETYPE)), GETPC(), false); \ 867 } 868 869 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host) 870 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host) 871 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host) 872 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host) 873 874 /* 875 * Vector Integer Arithmetic Instructions 876 */ 877 878 /* (TD, T1, T2, TX1, TX2) */ 879 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 880 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 881 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 882 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 883 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 884 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 885 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 886 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 887 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 888 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 889 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 890 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 891 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 892 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 893 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 894 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 895 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 896 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 897 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 898 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 899 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 900 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 901 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 902 903 #define DO_SUB(N, M) (N - M) 904 #define DO_RSUB(N, M) (M - N) 905 906 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 907 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 908 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 909 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 910 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 911 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 912 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 913 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 914 915 GEN_VEXT_VV(vadd_vv_b, 1) 916 GEN_VEXT_VV(vadd_vv_h, 2) 917 GEN_VEXT_VV(vadd_vv_w, 4) 918 GEN_VEXT_VV(vadd_vv_d, 8) 919 GEN_VEXT_VV(vsub_vv_b, 1) 920 GEN_VEXT_VV(vsub_vv_h, 2) 921 GEN_VEXT_VV(vsub_vv_w, 4) 922 GEN_VEXT_VV(vsub_vv_d, 8) 923 924 925 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 926 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 927 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 928 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 929 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 930 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 931 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 932 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 933 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 934 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 935 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 936 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 937 938 GEN_VEXT_VX(vadd_vx_b, 1) 939 GEN_VEXT_VX(vadd_vx_h, 2) 940 GEN_VEXT_VX(vadd_vx_w, 4) 941 GEN_VEXT_VX(vadd_vx_d, 8) 942 GEN_VEXT_VX(vsub_vx_b, 1) 943 GEN_VEXT_VX(vsub_vx_h, 2) 944 GEN_VEXT_VX(vsub_vx_w, 4) 945 GEN_VEXT_VX(vsub_vx_d, 8) 946 GEN_VEXT_VX(vrsub_vx_b, 1) 947 GEN_VEXT_VX(vrsub_vx_h, 2) 948 GEN_VEXT_VX(vrsub_vx_w, 4) 949 GEN_VEXT_VX(vrsub_vx_d, 8) 950 951 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 952 { 953 intptr_t oprsz = simd_oprsz(desc); 954 intptr_t i; 955 956 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 957 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 958 } 959 } 960 961 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 962 { 963 intptr_t oprsz = simd_oprsz(desc); 964 intptr_t i; 965 966 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 967 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 968 } 969 } 970 971 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 972 { 973 intptr_t oprsz = simd_oprsz(desc); 974 intptr_t i; 975 976 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 977 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 978 } 979 } 980 981 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 982 { 983 intptr_t oprsz = simd_oprsz(desc); 984 intptr_t i; 985 986 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 987 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 988 } 989 } 990 991 /* Vector Widening Integer Add/Subtract */ 992 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 993 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 994 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 995 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 996 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 997 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 998 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 999 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 1000 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 1001 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 1002 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 1003 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 1004 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 1005 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 1006 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 1007 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 1008 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 1009 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1010 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1011 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1012 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1013 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1014 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1015 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1016 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1017 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1018 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1019 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1020 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1021 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1022 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1023 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1024 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1025 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1026 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1027 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1028 GEN_VEXT_VV(vwaddu_vv_b, 2) 1029 GEN_VEXT_VV(vwaddu_vv_h, 4) 1030 GEN_VEXT_VV(vwaddu_vv_w, 8) 1031 GEN_VEXT_VV(vwsubu_vv_b, 2) 1032 GEN_VEXT_VV(vwsubu_vv_h, 4) 1033 GEN_VEXT_VV(vwsubu_vv_w, 8) 1034 GEN_VEXT_VV(vwadd_vv_b, 2) 1035 GEN_VEXT_VV(vwadd_vv_h, 4) 1036 GEN_VEXT_VV(vwadd_vv_w, 8) 1037 GEN_VEXT_VV(vwsub_vv_b, 2) 1038 GEN_VEXT_VV(vwsub_vv_h, 4) 1039 GEN_VEXT_VV(vwsub_vv_w, 8) 1040 GEN_VEXT_VV(vwaddu_wv_b, 2) 1041 GEN_VEXT_VV(vwaddu_wv_h, 4) 1042 GEN_VEXT_VV(vwaddu_wv_w, 8) 1043 GEN_VEXT_VV(vwsubu_wv_b, 2) 1044 GEN_VEXT_VV(vwsubu_wv_h, 4) 1045 GEN_VEXT_VV(vwsubu_wv_w, 8) 1046 GEN_VEXT_VV(vwadd_wv_b, 2) 1047 GEN_VEXT_VV(vwadd_wv_h, 4) 1048 GEN_VEXT_VV(vwadd_wv_w, 8) 1049 GEN_VEXT_VV(vwsub_wv_b, 2) 1050 GEN_VEXT_VV(vwsub_wv_h, 4) 1051 GEN_VEXT_VV(vwsub_wv_w, 8) 1052 1053 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1054 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1055 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1056 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1057 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1058 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1059 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1060 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1061 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1062 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1063 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1064 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1065 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1066 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1067 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1068 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1069 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1070 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1071 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1072 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1073 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1074 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1075 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1076 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1077 GEN_VEXT_VX(vwaddu_vx_b, 2) 1078 GEN_VEXT_VX(vwaddu_vx_h, 4) 1079 GEN_VEXT_VX(vwaddu_vx_w, 8) 1080 GEN_VEXT_VX(vwsubu_vx_b, 2) 1081 GEN_VEXT_VX(vwsubu_vx_h, 4) 1082 GEN_VEXT_VX(vwsubu_vx_w, 8) 1083 GEN_VEXT_VX(vwadd_vx_b, 2) 1084 GEN_VEXT_VX(vwadd_vx_h, 4) 1085 GEN_VEXT_VX(vwadd_vx_w, 8) 1086 GEN_VEXT_VX(vwsub_vx_b, 2) 1087 GEN_VEXT_VX(vwsub_vx_h, 4) 1088 GEN_VEXT_VX(vwsub_vx_w, 8) 1089 GEN_VEXT_VX(vwaddu_wx_b, 2) 1090 GEN_VEXT_VX(vwaddu_wx_h, 4) 1091 GEN_VEXT_VX(vwaddu_wx_w, 8) 1092 GEN_VEXT_VX(vwsubu_wx_b, 2) 1093 GEN_VEXT_VX(vwsubu_wx_h, 4) 1094 GEN_VEXT_VX(vwsubu_wx_w, 8) 1095 GEN_VEXT_VX(vwadd_wx_b, 2) 1096 GEN_VEXT_VX(vwadd_wx_h, 4) 1097 GEN_VEXT_VX(vwadd_wx_w, 8) 1098 GEN_VEXT_VX(vwsub_wx_b, 2) 1099 GEN_VEXT_VX(vwsub_wx_h, 4) 1100 GEN_VEXT_VX(vwsub_wx_w, 8) 1101 1102 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1103 #define DO_VADC(N, M, C) (N + M + C) 1104 #define DO_VSBC(N, M, C) (N - M - C) 1105 1106 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1107 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1108 CPURISCVState *env, uint32_t desc) \ 1109 { \ 1110 uint32_t vl = env->vl; \ 1111 uint32_t esz = sizeof(ETYPE); \ 1112 uint32_t total_elems = \ 1113 vext_get_total_elems(env, desc, esz); \ 1114 uint32_t vta = vext_vta(desc); \ 1115 uint32_t i; \ 1116 \ 1117 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1118 \ 1119 for (i = env->vstart; i < vl; i++) { \ 1120 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1121 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1122 ETYPE carry = vext_elem_mask(v0, i); \ 1123 \ 1124 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1125 } \ 1126 env->vstart = 0; \ 1127 /* set tail elements to 1s */ \ 1128 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1129 } 1130 1131 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1132 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1133 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1134 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1135 1136 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1137 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1138 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1139 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1140 1141 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1142 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1143 CPURISCVState *env, uint32_t desc) \ 1144 { \ 1145 uint32_t vl = env->vl; \ 1146 uint32_t esz = sizeof(ETYPE); \ 1147 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1148 uint32_t vta = vext_vta(desc); \ 1149 uint32_t i; \ 1150 \ 1151 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1152 \ 1153 for (i = env->vstart; i < vl; i++) { \ 1154 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1155 ETYPE carry = vext_elem_mask(v0, i); \ 1156 \ 1157 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1158 } \ 1159 env->vstart = 0; \ 1160 /* set tail elements to 1s */ \ 1161 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1162 } 1163 1164 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1165 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1166 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1167 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1168 1169 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1170 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1171 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1172 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1173 1174 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1175 (__typeof(N))(N + M) < N) 1176 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1177 1178 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1179 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1180 CPURISCVState *env, uint32_t desc) \ 1181 { \ 1182 uint32_t vl = env->vl; \ 1183 uint32_t vm = vext_vm(desc); \ 1184 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1185 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1186 uint32_t i; \ 1187 \ 1188 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1189 \ 1190 for (i = env->vstart; i < vl; i++) { \ 1191 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1192 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1193 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1194 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1195 } \ 1196 env->vstart = 0; \ 1197 /* 1198 * mask destination register are always tail-agnostic 1199 * set tail elements to 1s 1200 */ \ 1201 if (vta_all_1s) { \ 1202 for (; i < total_elems; i++) { \ 1203 vext_set_elem_mask(vd, i, 1); \ 1204 } \ 1205 } \ 1206 } 1207 1208 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1209 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1210 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1211 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1212 1213 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1214 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1215 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1216 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1217 1218 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1219 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1220 void *vs2, CPURISCVState *env, uint32_t desc) \ 1221 { \ 1222 uint32_t vl = env->vl; \ 1223 uint32_t vm = vext_vm(desc); \ 1224 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1225 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1226 uint32_t i; \ 1227 \ 1228 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1229 \ 1230 for (i = env->vstart; i < vl; i++) { \ 1231 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1232 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1233 vext_set_elem_mask(vd, i, \ 1234 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1235 } \ 1236 env->vstart = 0; \ 1237 /* 1238 * mask destination register are always tail-agnostic 1239 * set tail elements to 1s 1240 */ \ 1241 if (vta_all_1s) { \ 1242 for (; i < total_elems; i++) { \ 1243 vext_set_elem_mask(vd, i, 1); \ 1244 } \ 1245 } \ 1246 } 1247 1248 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1249 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1250 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1251 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1252 1253 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1254 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1255 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1256 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1257 1258 /* Vector Bitwise Logical Instructions */ 1259 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1260 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1261 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1262 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1263 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1264 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1265 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1266 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1267 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1268 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1269 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1270 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1271 GEN_VEXT_VV(vand_vv_b, 1) 1272 GEN_VEXT_VV(vand_vv_h, 2) 1273 GEN_VEXT_VV(vand_vv_w, 4) 1274 GEN_VEXT_VV(vand_vv_d, 8) 1275 GEN_VEXT_VV(vor_vv_b, 1) 1276 GEN_VEXT_VV(vor_vv_h, 2) 1277 GEN_VEXT_VV(vor_vv_w, 4) 1278 GEN_VEXT_VV(vor_vv_d, 8) 1279 GEN_VEXT_VV(vxor_vv_b, 1) 1280 GEN_VEXT_VV(vxor_vv_h, 2) 1281 GEN_VEXT_VV(vxor_vv_w, 4) 1282 GEN_VEXT_VV(vxor_vv_d, 8) 1283 1284 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1285 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1286 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1287 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1288 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1289 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1290 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1291 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1292 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1293 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1294 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1295 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1296 GEN_VEXT_VX(vand_vx_b, 1) 1297 GEN_VEXT_VX(vand_vx_h, 2) 1298 GEN_VEXT_VX(vand_vx_w, 4) 1299 GEN_VEXT_VX(vand_vx_d, 8) 1300 GEN_VEXT_VX(vor_vx_b, 1) 1301 GEN_VEXT_VX(vor_vx_h, 2) 1302 GEN_VEXT_VX(vor_vx_w, 4) 1303 GEN_VEXT_VX(vor_vx_d, 8) 1304 GEN_VEXT_VX(vxor_vx_b, 1) 1305 GEN_VEXT_VX(vxor_vx_h, 2) 1306 GEN_VEXT_VX(vxor_vx_w, 4) 1307 GEN_VEXT_VX(vxor_vx_d, 8) 1308 1309 /* Vector Single-Width Bit Shift Instructions */ 1310 #define DO_SLL(N, M) (N << (M)) 1311 #define DO_SRL(N, M) (N >> (M)) 1312 1313 /* generate the helpers for shift instructions with two vector operators */ 1314 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1315 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1316 void *vs2, CPURISCVState *env, uint32_t desc) \ 1317 { \ 1318 uint32_t vm = vext_vm(desc); \ 1319 uint32_t vl = env->vl; \ 1320 uint32_t esz = sizeof(TS1); \ 1321 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1322 uint32_t vta = vext_vta(desc); \ 1323 uint32_t vma = vext_vma(desc); \ 1324 uint32_t i; \ 1325 \ 1326 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1327 \ 1328 for (i = env->vstart; i < vl; i++) { \ 1329 if (!vm && !vext_elem_mask(v0, i)) { \ 1330 /* set masked-off elements to 1s */ \ 1331 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1332 continue; \ 1333 } \ 1334 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1335 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1336 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1337 } \ 1338 env->vstart = 0; \ 1339 /* set tail elements to 1s */ \ 1340 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1341 } 1342 1343 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1344 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1345 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1346 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1347 1348 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1349 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1350 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1351 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1352 1353 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1354 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1355 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1356 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1357 1358 /* 1359 * generate the helpers for shift instructions with one vector and one scalar 1360 */ 1361 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1362 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1363 void *vs2, CPURISCVState *env, \ 1364 uint32_t desc) \ 1365 { \ 1366 uint32_t vm = vext_vm(desc); \ 1367 uint32_t vl = env->vl; \ 1368 uint32_t esz = sizeof(TD); \ 1369 uint32_t total_elems = \ 1370 vext_get_total_elems(env, desc, esz); \ 1371 uint32_t vta = vext_vta(desc); \ 1372 uint32_t vma = vext_vma(desc); \ 1373 uint32_t i; \ 1374 \ 1375 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1376 \ 1377 for (i = env->vstart; i < vl; i++) { \ 1378 if (!vm && !vext_elem_mask(v0, i)) { \ 1379 /* set masked-off elements to 1s */ \ 1380 vext_set_elems_1s(vd, vma, i * esz, \ 1381 (i + 1) * esz); \ 1382 continue; \ 1383 } \ 1384 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1385 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1386 } \ 1387 env->vstart = 0; \ 1388 /* set tail elements to 1s */ \ 1389 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1390 } 1391 1392 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1393 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1394 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1395 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1396 1397 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1398 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1399 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1400 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1401 1402 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1403 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1404 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1405 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1406 1407 /* Vector Narrowing Integer Right Shift Instructions */ 1408 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1409 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1410 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1411 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1412 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1413 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1414 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1415 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1416 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1417 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1418 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1419 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1420 1421 /* Vector Integer Comparison Instructions */ 1422 #define DO_MSEQ(N, M) (N == M) 1423 #define DO_MSNE(N, M) (N != M) 1424 #define DO_MSLT(N, M) (N < M) 1425 #define DO_MSLE(N, M) (N <= M) 1426 #define DO_MSGT(N, M) (N > M) 1427 1428 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1429 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1430 CPURISCVState *env, uint32_t desc) \ 1431 { \ 1432 uint32_t vm = vext_vm(desc); \ 1433 uint32_t vl = env->vl; \ 1434 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1435 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1436 uint32_t vma = vext_vma(desc); \ 1437 uint32_t i; \ 1438 \ 1439 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1440 \ 1441 for (i = env->vstart; i < vl; i++) { \ 1442 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1443 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1444 if (!vm && !vext_elem_mask(v0, i)) { \ 1445 /* set masked-off elements to 1s */ \ 1446 if (vma) { \ 1447 vext_set_elem_mask(vd, i, 1); \ 1448 } \ 1449 continue; \ 1450 } \ 1451 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1452 } \ 1453 env->vstart = 0; \ 1454 /* 1455 * mask destination register are always tail-agnostic 1456 * set tail elements to 1s 1457 */ \ 1458 if (vta_all_1s) { \ 1459 for (; i < total_elems; i++) { \ 1460 vext_set_elem_mask(vd, i, 1); \ 1461 } \ 1462 } \ 1463 } 1464 1465 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1466 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1467 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1468 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1469 1470 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1471 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1472 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1473 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1474 1475 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1476 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1477 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1478 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1479 1480 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1481 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1482 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1483 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1484 1485 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1486 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1487 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1488 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1489 1490 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1491 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1492 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1493 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1494 1495 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1496 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1497 CPURISCVState *env, uint32_t desc) \ 1498 { \ 1499 uint32_t vm = vext_vm(desc); \ 1500 uint32_t vl = env->vl; \ 1501 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1502 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1503 uint32_t vma = vext_vma(desc); \ 1504 uint32_t i; \ 1505 \ 1506 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1507 \ 1508 for (i = env->vstart; i < vl; i++) { \ 1509 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1510 if (!vm && !vext_elem_mask(v0, i)) { \ 1511 /* set masked-off elements to 1s */ \ 1512 if (vma) { \ 1513 vext_set_elem_mask(vd, i, 1); \ 1514 } \ 1515 continue; \ 1516 } \ 1517 vext_set_elem_mask(vd, i, \ 1518 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1519 } \ 1520 env->vstart = 0; \ 1521 /* 1522 * mask destination register are always tail-agnostic 1523 * set tail elements to 1s 1524 */ \ 1525 if (vta_all_1s) { \ 1526 for (; i < total_elems; i++) { \ 1527 vext_set_elem_mask(vd, i, 1); \ 1528 } \ 1529 } \ 1530 } 1531 1532 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1533 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1534 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1535 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1536 1537 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1538 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1539 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1540 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1541 1542 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1543 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1544 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1545 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1546 1547 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1548 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1549 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1550 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1551 1552 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1553 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1554 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1555 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1556 1557 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1558 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1559 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1560 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1561 1562 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1563 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1564 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1565 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1566 1567 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1568 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1569 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1570 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1571 1572 /* Vector Integer Min/Max Instructions */ 1573 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1574 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1575 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1576 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1577 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1578 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1579 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1580 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1581 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1582 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1583 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1584 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1585 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1586 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1587 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1588 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1589 GEN_VEXT_VV(vminu_vv_b, 1) 1590 GEN_VEXT_VV(vminu_vv_h, 2) 1591 GEN_VEXT_VV(vminu_vv_w, 4) 1592 GEN_VEXT_VV(vminu_vv_d, 8) 1593 GEN_VEXT_VV(vmin_vv_b, 1) 1594 GEN_VEXT_VV(vmin_vv_h, 2) 1595 GEN_VEXT_VV(vmin_vv_w, 4) 1596 GEN_VEXT_VV(vmin_vv_d, 8) 1597 GEN_VEXT_VV(vmaxu_vv_b, 1) 1598 GEN_VEXT_VV(vmaxu_vv_h, 2) 1599 GEN_VEXT_VV(vmaxu_vv_w, 4) 1600 GEN_VEXT_VV(vmaxu_vv_d, 8) 1601 GEN_VEXT_VV(vmax_vv_b, 1) 1602 GEN_VEXT_VV(vmax_vv_h, 2) 1603 GEN_VEXT_VV(vmax_vv_w, 4) 1604 GEN_VEXT_VV(vmax_vv_d, 8) 1605 1606 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1607 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1608 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1609 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1610 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1611 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1612 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1613 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1614 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1615 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1616 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1617 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1618 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1619 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1620 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1621 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1622 GEN_VEXT_VX(vminu_vx_b, 1) 1623 GEN_VEXT_VX(vminu_vx_h, 2) 1624 GEN_VEXT_VX(vminu_vx_w, 4) 1625 GEN_VEXT_VX(vminu_vx_d, 8) 1626 GEN_VEXT_VX(vmin_vx_b, 1) 1627 GEN_VEXT_VX(vmin_vx_h, 2) 1628 GEN_VEXT_VX(vmin_vx_w, 4) 1629 GEN_VEXT_VX(vmin_vx_d, 8) 1630 GEN_VEXT_VX(vmaxu_vx_b, 1) 1631 GEN_VEXT_VX(vmaxu_vx_h, 2) 1632 GEN_VEXT_VX(vmaxu_vx_w, 4) 1633 GEN_VEXT_VX(vmaxu_vx_d, 8) 1634 GEN_VEXT_VX(vmax_vx_b, 1) 1635 GEN_VEXT_VX(vmax_vx_h, 2) 1636 GEN_VEXT_VX(vmax_vx_w, 4) 1637 GEN_VEXT_VX(vmax_vx_d, 8) 1638 1639 /* Vector Single-Width Integer Multiply Instructions */ 1640 #define DO_MUL(N, M) (N * M) 1641 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1642 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1643 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1644 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1645 GEN_VEXT_VV(vmul_vv_b, 1) 1646 GEN_VEXT_VV(vmul_vv_h, 2) 1647 GEN_VEXT_VV(vmul_vv_w, 4) 1648 GEN_VEXT_VV(vmul_vv_d, 8) 1649 1650 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1651 { 1652 return (int16_t)s2 * (int16_t)s1 >> 8; 1653 } 1654 1655 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1656 { 1657 return (int32_t)s2 * (int32_t)s1 >> 16; 1658 } 1659 1660 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1661 { 1662 return (int64_t)s2 * (int64_t)s1 >> 32; 1663 } 1664 1665 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1666 { 1667 uint64_t hi_64, lo_64; 1668 1669 muls64(&lo_64, &hi_64, s1, s2); 1670 return hi_64; 1671 } 1672 1673 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1674 { 1675 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1676 } 1677 1678 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1679 { 1680 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1681 } 1682 1683 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1684 { 1685 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1686 } 1687 1688 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1689 { 1690 uint64_t hi_64, lo_64; 1691 1692 mulu64(&lo_64, &hi_64, s2, s1); 1693 return hi_64; 1694 } 1695 1696 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1697 { 1698 return (int16_t)s2 * (uint16_t)s1 >> 8; 1699 } 1700 1701 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1702 { 1703 return (int32_t)s2 * (uint32_t)s1 >> 16; 1704 } 1705 1706 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1707 { 1708 return (int64_t)s2 * (uint64_t)s1 >> 32; 1709 } 1710 1711 /* 1712 * Let A = signed operand, 1713 * B = unsigned operand 1714 * P = mulu64(A, B), unsigned product 1715 * 1716 * LET X = 2 ** 64 - A, 2's complement of A 1717 * SP = signed product 1718 * THEN 1719 * IF A < 0 1720 * SP = -X * B 1721 * = -(2 ** 64 - A) * B 1722 * = A * B - 2 ** 64 * B 1723 * = P - 2 ** 64 * B 1724 * ELSE 1725 * SP = P 1726 * THEN 1727 * HI_P -= (A < 0 ? B : 0) 1728 */ 1729 1730 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1731 { 1732 uint64_t hi_64, lo_64; 1733 1734 mulu64(&lo_64, &hi_64, s2, s1); 1735 1736 hi_64 -= s2 < 0 ? s1 : 0; 1737 return hi_64; 1738 } 1739 1740 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1741 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1742 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1743 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1744 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1745 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1746 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1747 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1748 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1749 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1750 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1751 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1752 GEN_VEXT_VV(vmulh_vv_b, 1) 1753 GEN_VEXT_VV(vmulh_vv_h, 2) 1754 GEN_VEXT_VV(vmulh_vv_w, 4) 1755 GEN_VEXT_VV(vmulh_vv_d, 8) 1756 GEN_VEXT_VV(vmulhu_vv_b, 1) 1757 GEN_VEXT_VV(vmulhu_vv_h, 2) 1758 GEN_VEXT_VV(vmulhu_vv_w, 4) 1759 GEN_VEXT_VV(vmulhu_vv_d, 8) 1760 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1761 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1762 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1763 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1764 1765 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1766 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1767 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1768 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1769 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1770 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1771 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1772 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1773 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1774 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1775 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1776 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1777 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1778 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1779 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1780 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1781 GEN_VEXT_VX(vmul_vx_b, 1) 1782 GEN_VEXT_VX(vmul_vx_h, 2) 1783 GEN_VEXT_VX(vmul_vx_w, 4) 1784 GEN_VEXT_VX(vmul_vx_d, 8) 1785 GEN_VEXT_VX(vmulh_vx_b, 1) 1786 GEN_VEXT_VX(vmulh_vx_h, 2) 1787 GEN_VEXT_VX(vmulh_vx_w, 4) 1788 GEN_VEXT_VX(vmulh_vx_d, 8) 1789 GEN_VEXT_VX(vmulhu_vx_b, 1) 1790 GEN_VEXT_VX(vmulhu_vx_h, 2) 1791 GEN_VEXT_VX(vmulhu_vx_w, 4) 1792 GEN_VEXT_VX(vmulhu_vx_d, 8) 1793 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1794 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1795 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1796 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1797 1798 /* Vector Integer Divide Instructions */ 1799 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1800 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1801 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1802 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1803 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1804 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1805 1806 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1807 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1808 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1809 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1810 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1811 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1812 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1813 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1814 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1815 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1816 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1817 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1818 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1819 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1820 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1821 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1822 GEN_VEXT_VV(vdivu_vv_b, 1) 1823 GEN_VEXT_VV(vdivu_vv_h, 2) 1824 GEN_VEXT_VV(vdivu_vv_w, 4) 1825 GEN_VEXT_VV(vdivu_vv_d, 8) 1826 GEN_VEXT_VV(vdiv_vv_b, 1) 1827 GEN_VEXT_VV(vdiv_vv_h, 2) 1828 GEN_VEXT_VV(vdiv_vv_w, 4) 1829 GEN_VEXT_VV(vdiv_vv_d, 8) 1830 GEN_VEXT_VV(vremu_vv_b, 1) 1831 GEN_VEXT_VV(vremu_vv_h, 2) 1832 GEN_VEXT_VV(vremu_vv_w, 4) 1833 GEN_VEXT_VV(vremu_vv_d, 8) 1834 GEN_VEXT_VV(vrem_vv_b, 1) 1835 GEN_VEXT_VV(vrem_vv_h, 2) 1836 GEN_VEXT_VV(vrem_vv_w, 4) 1837 GEN_VEXT_VV(vrem_vv_d, 8) 1838 1839 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1840 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1841 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1842 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1843 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1844 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1845 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1846 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1847 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1848 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1849 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1850 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1851 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1852 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1853 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1854 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1855 GEN_VEXT_VX(vdivu_vx_b, 1) 1856 GEN_VEXT_VX(vdivu_vx_h, 2) 1857 GEN_VEXT_VX(vdivu_vx_w, 4) 1858 GEN_VEXT_VX(vdivu_vx_d, 8) 1859 GEN_VEXT_VX(vdiv_vx_b, 1) 1860 GEN_VEXT_VX(vdiv_vx_h, 2) 1861 GEN_VEXT_VX(vdiv_vx_w, 4) 1862 GEN_VEXT_VX(vdiv_vx_d, 8) 1863 GEN_VEXT_VX(vremu_vx_b, 1) 1864 GEN_VEXT_VX(vremu_vx_h, 2) 1865 GEN_VEXT_VX(vremu_vx_w, 4) 1866 GEN_VEXT_VX(vremu_vx_d, 8) 1867 GEN_VEXT_VX(vrem_vx_b, 1) 1868 GEN_VEXT_VX(vrem_vx_h, 2) 1869 GEN_VEXT_VX(vrem_vx_w, 4) 1870 GEN_VEXT_VX(vrem_vx_d, 8) 1871 1872 /* Vector Widening Integer Multiply Instructions */ 1873 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1874 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1875 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1876 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1877 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1878 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1879 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1880 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1881 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1882 GEN_VEXT_VV(vwmul_vv_b, 2) 1883 GEN_VEXT_VV(vwmul_vv_h, 4) 1884 GEN_VEXT_VV(vwmul_vv_w, 8) 1885 GEN_VEXT_VV(vwmulu_vv_b, 2) 1886 GEN_VEXT_VV(vwmulu_vv_h, 4) 1887 GEN_VEXT_VV(vwmulu_vv_w, 8) 1888 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1889 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1890 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1891 1892 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1893 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1894 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1895 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1896 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1897 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1898 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1899 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1900 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1901 GEN_VEXT_VX(vwmul_vx_b, 2) 1902 GEN_VEXT_VX(vwmul_vx_h, 4) 1903 GEN_VEXT_VX(vwmul_vx_w, 8) 1904 GEN_VEXT_VX(vwmulu_vx_b, 2) 1905 GEN_VEXT_VX(vwmulu_vx_h, 4) 1906 GEN_VEXT_VX(vwmulu_vx_w, 8) 1907 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1908 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1909 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1910 1911 /* Vector Single-Width Integer Multiply-Add Instructions */ 1912 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1913 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1914 { \ 1915 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1916 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1917 TD d = *((TD *)vd + HD(i)); \ 1918 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1919 } 1920 1921 #define DO_MACC(N, M, D) (M * N + D) 1922 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1923 #define DO_MADD(N, M, D) (M * D + N) 1924 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1925 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1926 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1927 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1928 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1929 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1930 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1931 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1932 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1933 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1934 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1935 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1936 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1937 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1938 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1939 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1940 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1941 GEN_VEXT_VV(vmacc_vv_b, 1) 1942 GEN_VEXT_VV(vmacc_vv_h, 2) 1943 GEN_VEXT_VV(vmacc_vv_w, 4) 1944 GEN_VEXT_VV(vmacc_vv_d, 8) 1945 GEN_VEXT_VV(vnmsac_vv_b, 1) 1946 GEN_VEXT_VV(vnmsac_vv_h, 2) 1947 GEN_VEXT_VV(vnmsac_vv_w, 4) 1948 GEN_VEXT_VV(vnmsac_vv_d, 8) 1949 GEN_VEXT_VV(vmadd_vv_b, 1) 1950 GEN_VEXT_VV(vmadd_vv_h, 2) 1951 GEN_VEXT_VV(vmadd_vv_w, 4) 1952 GEN_VEXT_VV(vmadd_vv_d, 8) 1953 GEN_VEXT_VV(vnmsub_vv_b, 1) 1954 GEN_VEXT_VV(vnmsub_vv_h, 2) 1955 GEN_VEXT_VV(vnmsub_vv_w, 4) 1956 GEN_VEXT_VV(vnmsub_vv_d, 8) 1957 1958 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1959 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1960 { \ 1961 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1962 TD d = *((TD *)vd + HD(i)); \ 1963 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1964 } 1965 1966 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1967 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1968 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1969 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1970 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1971 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1972 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1973 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1974 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1975 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1976 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1977 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1978 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1979 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1980 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1981 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1982 GEN_VEXT_VX(vmacc_vx_b, 1) 1983 GEN_VEXT_VX(vmacc_vx_h, 2) 1984 GEN_VEXT_VX(vmacc_vx_w, 4) 1985 GEN_VEXT_VX(vmacc_vx_d, 8) 1986 GEN_VEXT_VX(vnmsac_vx_b, 1) 1987 GEN_VEXT_VX(vnmsac_vx_h, 2) 1988 GEN_VEXT_VX(vnmsac_vx_w, 4) 1989 GEN_VEXT_VX(vnmsac_vx_d, 8) 1990 GEN_VEXT_VX(vmadd_vx_b, 1) 1991 GEN_VEXT_VX(vmadd_vx_h, 2) 1992 GEN_VEXT_VX(vmadd_vx_w, 4) 1993 GEN_VEXT_VX(vmadd_vx_d, 8) 1994 GEN_VEXT_VX(vnmsub_vx_b, 1) 1995 GEN_VEXT_VX(vnmsub_vx_h, 2) 1996 GEN_VEXT_VX(vnmsub_vx_w, 4) 1997 GEN_VEXT_VX(vnmsub_vx_d, 8) 1998 1999 /* Vector Widening Integer Multiply-Add Instructions */ 2000 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 2001 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 2002 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 2003 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 2004 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 2005 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 2006 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 2007 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 2008 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 2009 GEN_VEXT_VV(vwmaccu_vv_b, 2) 2010 GEN_VEXT_VV(vwmaccu_vv_h, 4) 2011 GEN_VEXT_VV(vwmaccu_vv_w, 8) 2012 GEN_VEXT_VV(vwmacc_vv_b, 2) 2013 GEN_VEXT_VV(vwmacc_vv_h, 4) 2014 GEN_VEXT_VV(vwmacc_vv_w, 8) 2015 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 2016 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 2017 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 2018 2019 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 2020 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 2021 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 2022 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 2023 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 2024 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 2025 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 2026 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 2027 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 2028 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 2029 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 2030 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 2031 GEN_VEXT_VX(vwmaccu_vx_b, 2) 2032 GEN_VEXT_VX(vwmaccu_vx_h, 4) 2033 GEN_VEXT_VX(vwmaccu_vx_w, 8) 2034 GEN_VEXT_VX(vwmacc_vx_b, 2) 2035 GEN_VEXT_VX(vwmacc_vx_h, 4) 2036 GEN_VEXT_VX(vwmacc_vx_w, 8) 2037 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 2038 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 2039 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 2040 GEN_VEXT_VX(vwmaccus_vx_b, 2) 2041 GEN_VEXT_VX(vwmaccus_vx_h, 4) 2042 GEN_VEXT_VX(vwmaccus_vx_w, 8) 2043 2044 /* Vector Integer Merge and Move Instructions */ 2045 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 2046 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2047 uint32_t desc) \ 2048 { \ 2049 uint32_t vl = env->vl; \ 2050 uint32_t esz = sizeof(ETYPE); \ 2051 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2052 uint32_t vta = vext_vta(desc); \ 2053 uint32_t i; \ 2054 \ 2055 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2056 \ 2057 for (i = env->vstart; i < vl; i++) { \ 2058 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2059 *((ETYPE *)vd + H(i)) = s1; \ 2060 } \ 2061 env->vstart = 0; \ 2062 /* set tail elements to 1s */ \ 2063 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2064 } 2065 2066 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2067 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2068 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2069 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2070 2071 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2072 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2073 uint32_t desc) \ 2074 { \ 2075 uint32_t vl = env->vl; \ 2076 uint32_t esz = sizeof(ETYPE); \ 2077 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2078 uint32_t vta = vext_vta(desc); \ 2079 uint32_t i; \ 2080 \ 2081 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2082 \ 2083 for (i = env->vstart; i < vl; i++) { \ 2084 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2085 } \ 2086 env->vstart = 0; \ 2087 /* set tail elements to 1s */ \ 2088 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2089 } 2090 2091 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2092 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2093 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2094 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2095 2096 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2097 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2098 CPURISCVState *env, uint32_t desc) \ 2099 { \ 2100 uint32_t vl = env->vl; \ 2101 uint32_t esz = sizeof(ETYPE); \ 2102 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2103 uint32_t vta = vext_vta(desc); \ 2104 uint32_t i; \ 2105 \ 2106 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2107 \ 2108 for (i = env->vstart; i < vl; i++) { \ 2109 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2110 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2111 } \ 2112 env->vstart = 0; \ 2113 /* set tail elements to 1s */ \ 2114 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2115 } 2116 2117 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2118 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2119 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2120 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2121 2122 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2123 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2124 void *vs2, CPURISCVState *env, uint32_t desc) \ 2125 { \ 2126 uint32_t vl = env->vl; \ 2127 uint32_t esz = sizeof(ETYPE); \ 2128 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2129 uint32_t vta = vext_vta(desc); \ 2130 uint32_t i; \ 2131 \ 2132 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2133 \ 2134 for (i = env->vstart; i < vl; i++) { \ 2135 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2136 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2137 (ETYPE)(target_long)s1); \ 2138 *((ETYPE *)vd + H(i)) = d; \ 2139 } \ 2140 env->vstart = 0; \ 2141 /* set tail elements to 1s */ \ 2142 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2143 } 2144 2145 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2146 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2147 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2148 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2149 2150 /* 2151 * Vector Fixed-Point Arithmetic Instructions 2152 */ 2153 2154 /* Vector Single-Width Saturating Add and Subtract */ 2155 2156 /* 2157 * As fixed point instructions probably have round mode and saturation, 2158 * define common macros for fixed point here. 2159 */ 2160 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2161 CPURISCVState *env, int vxrm); 2162 2163 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2164 static inline void \ 2165 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2166 CPURISCVState *env, int vxrm) \ 2167 { \ 2168 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2169 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2170 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2171 } 2172 2173 static inline void 2174 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2175 CPURISCVState *env, 2176 uint32_t vl, uint32_t vm, int vxrm, 2177 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2178 { 2179 for (uint32_t i = env->vstart; i < vl; i++) { 2180 if (!vm && !vext_elem_mask(v0, i)) { 2181 /* set masked-off elements to 1s */ 2182 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2183 continue; 2184 } 2185 fn(vd, vs1, vs2, i, env, vxrm); 2186 } 2187 env->vstart = 0; 2188 } 2189 2190 static inline void 2191 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2192 CPURISCVState *env, 2193 uint32_t desc, 2194 opivv2_rm_fn *fn, uint32_t esz) 2195 { 2196 uint32_t vm = vext_vm(desc); 2197 uint32_t vl = env->vl; 2198 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2199 uint32_t vta = vext_vta(desc); 2200 uint32_t vma = vext_vma(desc); 2201 2202 VSTART_CHECK_EARLY_EXIT(env, vl); 2203 2204 switch (env->vxrm) { 2205 case 0: /* rnu */ 2206 vext_vv_rm_1(vd, v0, vs1, vs2, 2207 env, vl, vm, 0, fn, vma, esz); 2208 break; 2209 case 1: /* rne */ 2210 vext_vv_rm_1(vd, v0, vs1, vs2, 2211 env, vl, vm, 1, fn, vma, esz); 2212 break; 2213 case 2: /* rdn */ 2214 vext_vv_rm_1(vd, v0, vs1, vs2, 2215 env, vl, vm, 2, fn, vma, esz); 2216 break; 2217 default: /* rod */ 2218 vext_vv_rm_1(vd, v0, vs1, vs2, 2219 env, vl, vm, 3, fn, vma, esz); 2220 break; 2221 } 2222 /* set tail elements to 1s */ 2223 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2224 } 2225 2226 /* generate helpers for fixed point instructions with OPIVV format */ 2227 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2228 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2229 CPURISCVState *env, uint32_t desc) \ 2230 { \ 2231 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2232 do_##NAME, ESZ); \ 2233 } 2234 2235 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2236 uint8_t b) 2237 { 2238 uint8_t res = a + b; 2239 if (res < a) { 2240 res = UINT8_MAX; 2241 env->vxsat = 0x1; 2242 } 2243 return res; 2244 } 2245 2246 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2247 uint16_t b) 2248 { 2249 uint16_t res = a + b; 2250 if (res < a) { 2251 res = UINT16_MAX; 2252 env->vxsat = 0x1; 2253 } 2254 return res; 2255 } 2256 2257 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2258 uint32_t b) 2259 { 2260 uint32_t res = a + b; 2261 if (res < a) { 2262 res = UINT32_MAX; 2263 env->vxsat = 0x1; 2264 } 2265 return res; 2266 } 2267 2268 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2269 uint64_t b) 2270 { 2271 uint64_t res = a + b; 2272 if (res < a) { 2273 res = UINT64_MAX; 2274 env->vxsat = 0x1; 2275 } 2276 return res; 2277 } 2278 2279 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2280 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2281 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2282 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2283 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2284 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2285 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2286 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2287 2288 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2289 CPURISCVState *env, int vxrm); 2290 2291 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2292 static inline void \ 2293 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2294 CPURISCVState *env, int vxrm) \ 2295 { \ 2296 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2297 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2298 } 2299 2300 static inline void 2301 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2302 CPURISCVState *env, 2303 uint32_t vl, uint32_t vm, int vxrm, 2304 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2305 { 2306 for (uint32_t i = env->vstart; i < vl; i++) { 2307 if (!vm && !vext_elem_mask(v0, i)) { 2308 /* set masked-off elements to 1s */ 2309 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2310 continue; 2311 } 2312 fn(vd, s1, vs2, i, env, vxrm); 2313 } 2314 env->vstart = 0; 2315 } 2316 2317 static inline void 2318 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2319 CPURISCVState *env, 2320 uint32_t desc, 2321 opivx2_rm_fn *fn, uint32_t esz) 2322 { 2323 uint32_t vm = vext_vm(desc); 2324 uint32_t vl = env->vl; 2325 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2326 uint32_t vta = vext_vta(desc); 2327 uint32_t vma = vext_vma(desc); 2328 2329 VSTART_CHECK_EARLY_EXIT(env, vl); 2330 2331 switch (env->vxrm) { 2332 case 0: /* rnu */ 2333 vext_vx_rm_1(vd, v0, s1, vs2, 2334 env, vl, vm, 0, fn, vma, esz); 2335 break; 2336 case 1: /* rne */ 2337 vext_vx_rm_1(vd, v0, s1, vs2, 2338 env, vl, vm, 1, fn, vma, esz); 2339 break; 2340 case 2: /* rdn */ 2341 vext_vx_rm_1(vd, v0, s1, vs2, 2342 env, vl, vm, 2, fn, vma, esz); 2343 break; 2344 default: /* rod */ 2345 vext_vx_rm_1(vd, v0, s1, vs2, 2346 env, vl, vm, 3, fn, vma, esz); 2347 break; 2348 } 2349 /* set tail elements to 1s */ 2350 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2351 } 2352 2353 /* generate helpers for fixed point instructions with OPIVX format */ 2354 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2355 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2356 void *vs2, CPURISCVState *env, \ 2357 uint32_t desc) \ 2358 { \ 2359 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2360 do_##NAME, ESZ); \ 2361 } 2362 2363 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2364 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2365 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2366 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2367 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2368 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2369 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2370 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2371 2372 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2373 { 2374 int8_t res = a + b; 2375 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2376 res = a > 0 ? INT8_MAX : INT8_MIN; 2377 env->vxsat = 0x1; 2378 } 2379 return res; 2380 } 2381 2382 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2383 int16_t b) 2384 { 2385 int16_t res = a + b; 2386 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2387 res = a > 0 ? INT16_MAX : INT16_MIN; 2388 env->vxsat = 0x1; 2389 } 2390 return res; 2391 } 2392 2393 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2394 int32_t b) 2395 { 2396 int32_t res = a + b; 2397 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2398 res = a > 0 ? INT32_MAX : INT32_MIN; 2399 env->vxsat = 0x1; 2400 } 2401 return res; 2402 } 2403 2404 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2405 int64_t b) 2406 { 2407 int64_t res = a + b; 2408 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2409 res = a > 0 ? INT64_MAX : INT64_MIN; 2410 env->vxsat = 0x1; 2411 } 2412 return res; 2413 } 2414 2415 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2416 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2417 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2418 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2419 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2420 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2421 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2422 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2423 2424 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2425 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2426 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2427 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2428 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2429 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2430 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2431 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2432 2433 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2434 uint8_t b) 2435 { 2436 uint8_t res = a - b; 2437 if (res > a) { 2438 res = 0; 2439 env->vxsat = 0x1; 2440 } 2441 return res; 2442 } 2443 2444 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2445 uint16_t b) 2446 { 2447 uint16_t res = a - b; 2448 if (res > a) { 2449 res = 0; 2450 env->vxsat = 0x1; 2451 } 2452 return res; 2453 } 2454 2455 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2456 uint32_t b) 2457 { 2458 uint32_t res = a - b; 2459 if (res > a) { 2460 res = 0; 2461 env->vxsat = 0x1; 2462 } 2463 return res; 2464 } 2465 2466 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2467 uint64_t b) 2468 { 2469 uint64_t res = a - b; 2470 if (res > a) { 2471 res = 0; 2472 env->vxsat = 0x1; 2473 } 2474 return res; 2475 } 2476 2477 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2478 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2479 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2480 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2481 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2482 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2483 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2484 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2485 2486 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2487 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2488 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2489 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2490 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2491 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2492 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2493 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2494 2495 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2496 { 2497 int8_t res = a - b; 2498 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2499 res = a >= 0 ? INT8_MAX : INT8_MIN; 2500 env->vxsat = 0x1; 2501 } 2502 return res; 2503 } 2504 2505 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2506 int16_t b) 2507 { 2508 int16_t res = a - b; 2509 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2510 res = a >= 0 ? INT16_MAX : INT16_MIN; 2511 env->vxsat = 0x1; 2512 } 2513 return res; 2514 } 2515 2516 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2517 int32_t b) 2518 { 2519 int32_t res = a - b; 2520 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2521 res = a >= 0 ? INT32_MAX : INT32_MIN; 2522 env->vxsat = 0x1; 2523 } 2524 return res; 2525 } 2526 2527 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2528 int64_t b) 2529 { 2530 int64_t res = a - b; 2531 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2532 res = a >= 0 ? INT64_MAX : INT64_MIN; 2533 env->vxsat = 0x1; 2534 } 2535 return res; 2536 } 2537 2538 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2539 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2540 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2541 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2542 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2543 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2544 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2545 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2546 2547 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2548 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2549 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2550 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2551 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2552 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2553 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2554 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2555 2556 /* Vector Single-Width Averaging Add and Subtract */ 2557 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2558 { 2559 uint8_t d = extract64(v, shift, 1); 2560 uint8_t d1; 2561 uint64_t D1, D2; 2562 2563 if (shift == 0 || shift > 64) { 2564 return 0; 2565 } 2566 2567 d1 = extract64(v, shift - 1, 1); 2568 D1 = extract64(v, 0, shift); 2569 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2570 return d1; 2571 } else if (vxrm == 1) { /* round-to-nearest-even */ 2572 if (shift > 1) { 2573 D2 = extract64(v, 0, shift - 1); 2574 return d1 & ((D2 != 0) | d); 2575 } else { 2576 return d1 & d; 2577 } 2578 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2579 return !d & (D1 != 0); 2580 } 2581 return 0; /* round-down (truncate) */ 2582 } 2583 2584 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2585 int32_t b) 2586 { 2587 int64_t res = (int64_t)a + b; 2588 uint8_t round = get_round(vxrm, res, 1); 2589 2590 return (res >> 1) + round; 2591 } 2592 2593 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2594 int64_t b) 2595 { 2596 int64_t res = a + b; 2597 uint8_t round = get_round(vxrm, res, 1); 2598 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2599 2600 /* With signed overflow, bit 64 is inverse of bit 63. */ 2601 return ((res >> 1) ^ over) + round; 2602 } 2603 2604 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2605 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2606 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2607 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2608 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2609 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2610 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2611 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2612 2613 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2614 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2615 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2616 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2617 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2618 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2619 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2620 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2621 2622 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2623 uint32_t a, uint32_t b) 2624 { 2625 uint64_t res = (uint64_t)a + b; 2626 uint8_t round = get_round(vxrm, res, 1); 2627 2628 return (res >> 1) + round; 2629 } 2630 2631 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2632 uint64_t a, uint64_t b) 2633 { 2634 uint64_t res = a + b; 2635 uint8_t round = get_round(vxrm, res, 1); 2636 uint64_t over = (uint64_t)(res < a) << 63; 2637 2638 return ((res >> 1) | over) + round; 2639 } 2640 2641 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2642 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2643 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2644 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2645 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2646 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2647 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2648 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2649 2650 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2651 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2652 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2653 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2654 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2655 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2656 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2657 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2658 2659 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2660 int32_t b) 2661 { 2662 int64_t res = (int64_t)a - b; 2663 uint8_t round = get_round(vxrm, res, 1); 2664 2665 return (res >> 1) + round; 2666 } 2667 2668 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2669 int64_t b) 2670 { 2671 int64_t res = (int64_t)a - b; 2672 uint8_t round = get_round(vxrm, res, 1); 2673 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2674 2675 /* With signed overflow, bit 64 is inverse of bit 63. */ 2676 return ((res >> 1) ^ over) + round; 2677 } 2678 2679 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2680 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2681 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2682 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2683 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2684 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2685 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2686 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2687 2688 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2689 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2690 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2691 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2692 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2693 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2694 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2695 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2696 2697 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2698 uint32_t a, uint32_t b) 2699 { 2700 int64_t res = (int64_t)a - b; 2701 uint8_t round = get_round(vxrm, res, 1); 2702 2703 return (res >> 1) + round; 2704 } 2705 2706 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2707 uint64_t a, uint64_t b) 2708 { 2709 uint64_t res = (uint64_t)a - b; 2710 uint8_t round = get_round(vxrm, res, 1); 2711 uint64_t over = (uint64_t)(res > a) << 63; 2712 2713 return ((res >> 1) | over) + round; 2714 } 2715 2716 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2717 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2718 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2719 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2720 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2721 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2722 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2723 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2724 2725 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2726 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2727 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2728 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2729 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2730 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2731 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2732 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2733 2734 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2735 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2736 { 2737 uint8_t round; 2738 int16_t res; 2739 2740 res = (int16_t)a * (int16_t)b; 2741 round = get_round(vxrm, res, 7); 2742 res = (res >> 7) + round; 2743 2744 if (res > INT8_MAX) { 2745 env->vxsat = 0x1; 2746 return INT8_MAX; 2747 } else if (res < INT8_MIN) { 2748 env->vxsat = 0x1; 2749 return INT8_MIN; 2750 } else { 2751 return res; 2752 } 2753 } 2754 2755 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2756 { 2757 uint8_t round; 2758 int32_t res; 2759 2760 res = (int32_t)a * (int32_t)b; 2761 round = get_round(vxrm, res, 15); 2762 res = (res >> 15) + round; 2763 2764 if (res > INT16_MAX) { 2765 env->vxsat = 0x1; 2766 return INT16_MAX; 2767 } else if (res < INT16_MIN) { 2768 env->vxsat = 0x1; 2769 return INT16_MIN; 2770 } else { 2771 return res; 2772 } 2773 } 2774 2775 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2776 { 2777 uint8_t round; 2778 int64_t res; 2779 2780 res = (int64_t)a * (int64_t)b; 2781 round = get_round(vxrm, res, 31); 2782 res = (res >> 31) + round; 2783 2784 if (res > INT32_MAX) { 2785 env->vxsat = 0x1; 2786 return INT32_MAX; 2787 } else if (res < INT32_MIN) { 2788 env->vxsat = 0x1; 2789 return INT32_MIN; 2790 } else { 2791 return res; 2792 } 2793 } 2794 2795 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2796 { 2797 uint8_t round; 2798 uint64_t hi_64, lo_64; 2799 int64_t res; 2800 2801 if (a == INT64_MIN && b == INT64_MIN) { 2802 env->vxsat = 1; 2803 return INT64_MAX; 2804 } 2805 2806 muls64(&lo_64, &hi_64, a, b); 2807 round = get_round(vxrm, lo_64, 63); 2808 /* 2809 * Cannot overflow, as there are always 2810 * 2 sign bits after multiply. 2811 */ 2812 res = (hi_64 << 1) | (lo_64 >> 63); 2813 if (round) { 2814 if (res == INT64_MAX) { 2815 env->vxsat = 1; 2816 } else { 2817 res += 1; 2818 } 2819 } 2820 return res; 2821 } 2822 2823 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2824 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2825 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2826 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2827 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2828 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2829 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2830 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2831 2832 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2833 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2834 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2835 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2836 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2837 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2838 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2839 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2840 2841 /* Vector Single-Width Scaling Shift Instructions */ 2842 static inline uint8_t 2843 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2844 { 2845 uint8_t round, shift = b & 0x7; 2846 uint8_t res; 2847 2848 round = get_round(vxrm, a, shift); 2849 res = (a >> shift) + round; 2850 return res; 2851 } 2852 static inline uint16_t 2853 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2854 { 2855 uint8_t round, shift = b & 0xf; 2856 2857 round = get_round(vxrm, a, shift); 2858 return (a >> shift) + round; 2859 } 2860 static inline uint32_t 2861 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2862 { 2863 uint8_t round, shift = b & 0x1f; 2864 2865 round = get_round(vxrm, a, shift); 2866 return (a >> shift) + round; 2867 } 2868 static inline uint64_t 2869 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2870 { 2871 uint8_t round, shift = b & 0x3f; 2872 2873 round = get_round(vxrm, a, shift); 2874 return (a >> shift) + round; 2875 } 2876 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2877 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2878 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2879 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2880 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2881 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2882 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2883 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2884 2885 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2886 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2887 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2888 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2889 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2890 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2891 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2892 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2893 2894 static inline int8_t 2895 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2896 { 2897 uint8_t round, shift = b & 0x7; 2898 2899 round = get_round(vxrm, a, shift); 2900 return (a >> shift) + round; 2901 } 2902 static inline int16_t 2903 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2904 { 2905 uint8_t round, shift = b & 0xf; 2906 2907 round = get_round(vxrm, a, shift); 2908 return (a >> shift) + round; 2909 } 2910 static inline int32_t 2911 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2912 { 2913 uint8_t round, shift = b & 0x1f; 2914 2915 round = get_round(vxrm, a, shift); 2916 return (a >> shift) + round; 2917 } 2918 static inline int64_t 2919 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2920 { 2921 uint8_t round, shift = b & 0x3f; 2922 2923 round = get_round(vxrm, a, shift); 2924 return (a >> shift) + round; 2925 } 2926 2927 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2928 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2929 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2930 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2931 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2932 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2933 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2934 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2935 2936 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2937 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2938 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2939 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2940 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2941 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2942 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2943 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2944 2945 /* Vector Narrowing Fixed-Point Clip Instructions */ 2946 static inline int8_t 2947 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2948 { 2949 uint8_t round, shift = b & 0xf; 2950 int16_t res; 2951 2952 round = get_round(vxrm, a, shift); 2953 res = (a >> shift) + round; 2954 if (res > INT8_MAX) { 2955 env->vxsat = 0x1; 2956 return INT8_MAX; 2957 } else if (res < INT8_MIN) { 2958 env->vxsat = 0x1; 2959 return INT8_MIN; 2960 } else { 2961 return res; 2962 } 2963 } 2964 2965 static inline int16_t 2966 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2967 { 2968 uint8_t round, shift = b & 0x1f; 2969 int32_t res; 2970 2971 round = get_round(vxrm, a, shift); 2972 res = (a >> shift) + round; 2973 if (res > INT16_MAX) { 2974 env->vxsat = 0x1; 2975 return INT16_MAX; 2976 } else if (res < INT16_MIN) { 2977 env->vxsat = 0x1; 2978 return INT16_MIN; 2979 } else { 2980 return res; 2981 } 2982 } 2983 2984 static inline int32_t 2985 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2986 { 2987 uint8_t round, shift = b & 0x3f; 2988 int64_t res; 2989 2990 round = get_round(vxrm, a, shift); 2991 res = (a >> shift) + round; 2992 if (res > INT32_MAX) { 2993 env->vxsat = 0x1; 2994 return INT32_MAX; 2995 } else if (res < INT32_MIN) { 2996 env->vxsat = 0x1; 2997 return INT32_MIN; 2998 } else { 2999 return res; 3000 } 3001 } 3002 3003 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 3004 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 3005 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 3006 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 3007 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 3008 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 3009 3010 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 3011 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 3012 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 3013 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 3014 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 3015 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 3016 3017 static inline uint8_t 3018 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 3019 { 3020 uint8_t round, shift = b & 0xf; 3021 uint16_t res; 3022 3023 round = get_round(vxrm, a, shift); 3024 res = (a >> shift) + round; 3025 if (res > UINT8_MAX) { 3026 env->vxsat = 0x1; 3027 return UINT8_MAX; 3028 } else { 3029 return res; 3030 } 3031 } 3032 3033 static inline uint16_t 3034 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3035 { 3036 uint8_t round, shift = b & 0x1f; 3037 uint32_t res; 3038 3039 round = get_round(vxrm, a, shift); 3040 res = (a >> shift) + round; 3041 if (res > UINT16_MAX) { 3042 env->vxsat = 0x1; 3043 return UINT16_MAX; 3044 } else { 3045 return res; 3046 } 3047 } 3048 3049 static inline uint32_t 3050 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3051 { 3052 uint8_t round, shift = b & 0x3f; 3053 uint64_t res; 3054 3055 round = get_round(vxrm, a, shift); 3056 res = (a >> shift) + round; 3057 if (res > UINT32_MAX) { 3058 env->vxsat = 0x1; 3059 return UINT32_MAX; 3060 } else { 3061 return res; 3062 } 3063 } 3064 3065 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3066 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3067 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3068 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3069 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3070 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3071 3072 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3073 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3074 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3075 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3076 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3077 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3078 3079 /* 3080 * Vector Float Point Arithmetic Instructions 3081 */ 3082 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3083 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3084 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3085 CPURISCVState *env) \ 3086 { \ 3087 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3088 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3089 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3090 } 3091 3092 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3093 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3094 void *vs2, CPURISCVState *env, \ 3095 uint32_t desc) \ 3096 { \ 3097 uint32_t vm = vext_vm(desc); \ 3098 uint32_t vl = env->vl; \ 3099 uint32_t total_elems = \ 3100 vext_get_total_elems(env, desc, ESZ); \ 3101 uint32_t vta = vext_vta(desc); \ 3102 uint32_t vma = vext_vma(desc); \ 3103 uint32_t i; \ 3104 \ 3105 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3106 \ 3107 for (i = env->vstart; i < vl; i++) { \ 3108 if (!vm && !vext_elem_mask(v0, i)) { \ 3109 /* set masked-off elements to 1s */ \ 3110 vext_set_elems_1s(vd, vma, i * ESZ, \ 3111 (i + 1) * ESZ); \ 3112 continue; \ 3113 } \ 3114 do_##NAME(vd, vs1, vs2, i, env); \ 3115 } \ 3116 env->vstart = 0; \ 3117 /* set tail elements to 1s */ \ 3118 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3119 total_elems * ESZ); \ 3120 } 3121 3122 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3123 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3124 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3125 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3126 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3127 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3128 3129 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3130 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3131 CPURISCVState *env) \ 3132 { \ 3133 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3134 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3135 } 3136 3137 #define GEN_VEXT_VF(NAME, ESZ) \ 3138 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3139 void *vs2, CPURISCVState *env, \ 3140 uint32_t desc) \ 3141 { \ 3142 uint32_t vm = vext_vm(desc); \ 3143 uint32_t vl = env->vl; \ 3144 uint32_t total_elems = \ 3145 vext_get_total_elems(env, desc, ESZ); \ 3146 uint32_t vta = vext_vta(desc); \ 3147 uint32_t vma = vext_vma(desc); \ 3148 uint32_t i; \ 3149 \ 3150 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3151 \ 3152 for (i = env->vstart; i < vl; i++) { \ 3153 if (!vm && !vext_elem_mask(v0, i)) { \ 3154 /* set masked-off elements to 1s */ \ 3155 vext_set_elems_1s(vd, vma, i * ESZ, \ 3156 (i + 1) * ESZ); \ 3157 continue; \ 3158 } \ 3159 do_##NAME(vd, s1, vs2, i, env); \ 3160 } \ 3161 env->vstart = 0; \ 3162 /* set tail elements to 1s */ \ 3163 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3164 total_elems * ESZ); \ 3165 } 3166 3167 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3168 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3169 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3170 GEN_VEXT_VF(vfadd_vf_h, 2) 3171 GEN_VEXT_VF(vfadd_vf_w, 4) 3172 GEN_VEXT_VF(vfadd_vf_d, 8) 3173 3174 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3175 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3176 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3177 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3178 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3179 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3180 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3181 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3182 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3183 GEN_VEXT_VF(vfsub_vf_h, 2) 3184 GEN_VEXT_VF(vfsub_vf_w, 4) 3185 GEN_VEXT_VF(vfsub_vf_d, 8) 3186 3187 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3188 { 3189 return float16_sub(b, a, s); 3190 } 3191 3192 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3193 { 3194 return float32_sub(b, a, s); 3195 } 3196 3197 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3198 { 3199 return float64_sub(b, a, s); 3200 } 3201 3202 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3203 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3204 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3205 GEN_VEXT_VF(vfrsub_vf_h, 2) 3206 GEN_VEXT_VF(vfrsub_vf_w, 4) 3207 GEN_VEXT_VF(vfrsub_vf_d, 8) 3208 3209 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3210 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3211 { 3212 return float32_add(float16_to_float32(a, true, s), 3213 float16_to_float32(b, true, s), s); 3214 } 3215 3216 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3217 { 3218 return float64_add(float32_to_float64(a, s), 3219 float32_to_float64(b, s), s); 3220 3221 } 3222 3223 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3224 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3225 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3226 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3227 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3228 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3229 GEN_VEXT_VF(vfwadd_vf_h, 4) 3230 GEN_VEXT_VF(vfwadd_vf_w, 8) 3231 3232 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3233 { 3234 return float32_sub(float16_to_float32(a, true, s), 3235 float16_to_float32(b, true, s), s); 3236 } 3237 3238 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3239 { 3240 return float64_sub(float32_to_float64(a, s), 3241 float32_to_float64(b, s), s); 3242 3243 } 3244 3245 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3246 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3247 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3248 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3249 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3250 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3251 GEN_VEXT_VF(vfwsub_vf_h, 4) 3252 GEN_VEXT_VF(vfwsub_vf_w, 8) 3253 3254 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3255 { 3256 return float32_add(a, float16_to_float32(b, true, s), s); 3257 } 3258 3259 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3260 { 3261 return float64_add(a, float32_to_float64(b, s), s); 3262 } 3263 3264 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3265 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3266 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3267 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3268 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3269 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3270 GEN_VEXT_VF(vfwadd_wf_h, 4) 3271 GEN_VEXT_VF(vfwadd_wf_w, 8) 3272 3273 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3274 { 3275 return float32_sub(a, float16_to_float32(b, true, s), s); 3276 } 3277 3278 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3279 { 3280 return float64_sub(a, float32_to_float64(b, s), s); 3281 } 3282 3283 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3284 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3285 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3286 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3287 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3288 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3289 GEN_VEXT_VF(vfwsub_wf_h, 4) 3290 GEN_VEXT_VF(vfwsub_wf_w, 8) 3291 3292 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3293 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3294 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3295 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3296 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3297 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3298 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3299 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3300 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3301 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3302 GEN_VEXT_VF(vfmul_vf_h, 2) 3303 GEN_VEXT_VF(vfmul_vf_w, 4) 3304 GEN_VEXT_VF(vfmul_vf_d, 8) 3305 3306 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3307 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3308 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3309 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3310 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3311 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3312 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3313 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3314 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3315 GEN_VEXT_VF(vfdiv_vf_h, 2) 3316 GEN_VEXT_VF(vfdiv_vf_w, 4) 3317 GEN_VEXT_VF(vfdiv_vf_d, 8) 3318 3319 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3320 { 3321 return float16_div(b, a, s); 3322 } 3323 3324 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3325 { 3326 return float32_div(b, a, s); 3327 } 3328 3329 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3330 { 3331 return float64_div(b, a, s); 3332 } 3333 3334 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3335 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3336 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3337 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3338 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3339 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3340 3341 /* Vector Widening Floating-Point Multiply */ 3342 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3343 { 3344 return float32_mul(float16_to_float32(a, true, s), 3345 float16_to_float32(b, true, s), s); 3346 } 3347 3348 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3349 { 3350 return float64_mul(float32_to_float64(a, s), 3351 float32_to_float64(b, s), s); 3352 3353 } 3354 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3355 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3356 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3357 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3358 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3359 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3360 GEN_VEXT_VF(vfwmul_vf_h, 4) 3361 GEN_VEXT_VF(vfwmul_vf_w, 8) 3362 3363 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3364 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3365 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3366 CPURISCVState *env) \ 3367 { \ 3368 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3369 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3370 TD d = *((TD *)vd + HD(i)); \ 3371 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3372 } 3373 3374 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3375 { 3376 return float16_muladd(a, b, d, 0, s); 3377 } 3378 3379 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3380 { 3381 return float32_muladd(a, b, d, 0, s); 3382 } 3383 3384 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3385 { 3386 return float64_muladd(a, b, d, 0, s); 3387 } 3388 3389 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3390 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3391 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3392 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3393 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3394 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3395 3396 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3397 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3398 CPURISCVState *env) \ 3399 { \ 3400 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3401 TD d = *((TD *)vd + HD(i)); \ 3402 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3403 } 3404 3405 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3406 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3407 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3408 GEN_VEXT_VF(vfmacc_vf_h, 2) 3409 GEN_VEXT_VF(vfmacc_vf_w, 4) 3410 GEN_VEXT_VF(vfmacc_vf_d, 8) 3411 3412 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3413 { 3414 return float16_muladd(a, b, d, float_muladd_negate_c | 3415 float_muladd_negate_product, s); 3416 } 3417 3418 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3419 { 3420 return float32_muladd(a, b, d, float_muladd_negate_c | 3421 float_muladd_negate_product, s); 3422 } 3423 3424 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3425 { 3426 return float64_muladd(a, b, d, float_muladd_negate_c | 3427 float_muladd_negate_product, s); 3428 } 3429 3430 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3431 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3432 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3433 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3434 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3435 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3436 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3437 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3438 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3439 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3440 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3441 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3442 3443 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3444 { 3445 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3446 } 3447 3448 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3449 { 3450 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3451 } 3452 3453 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3454 { 3455 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3456 } 3457 3458 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3459 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3460 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3461 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3462 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3463 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3464 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3465 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3466 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3467 GEN_VEXT_VF(vfmsac_vf_h, 2) 3468 GEN_VEXT_VF(vfmsac_vf_w, 4) 3469 GEN_VEXT_VF(vfmsac_vf_d, 8) 3470 3471 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3472 { 3473 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3474 } 3475 3476 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3477 { 3478 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3479 } 3480 3481 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3482 { 3483 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3484 } 3485 3486 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3487 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3488 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3489 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3490 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3491 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3492 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3493 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3494 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3495 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3496 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3497 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3498 3499 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3500 { 3501 return float16_muladd(d, b, a, 0, s); 3502 } 3503 3504 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3505 { 3506 return float32_muladd(d, b, a, 0, s); 3507 } 3508 3509 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3510 { 3511 return float64_muladd(d, b, a, 0, s); 3512 } 3513 3514 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3515 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3516 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3517 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3518 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3519 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3520 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3521 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3522 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3523 GEN_VEXT_VF(vfmadd_vf_h, 2) 3524 GEN_VEXT_VF(vfmadd_vf_w, 4) 3525 GEN_VEXT_VF(vfmadd_vf_d, 8) 3526 3527 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3528 { 3529 return float16_muladd(d, b, a, float_muladd_negate_c | 3530 float_muladd_negate_product, s); 3531 } 3532 3533 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3534 { 3535 return float32_muladd(d, b, a, float_muladd_negate_c | 3536 float_muladd_negate_product, s); 3537 } 3538 3539 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3540 { 3541 return float64_muladd(d, b, a, float_muladd_negate_c | 3542 float_muladd_negate_product, s); 3543 } 3544 3545 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3546 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3547 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3548 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3549 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3550 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3551 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3552 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3553 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3554 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3555 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3556 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3557 3558 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3559 { 3560 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3561 } 3562 3563 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3564 { 3565 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3566 } 3567 3568 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3569 { 3570 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3571 } 3572 3573 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3574 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3575 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3576 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3577 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3578 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3579 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3580 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3581 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3582 GEN_VEXT_VF(vfmsub_vf_h, 2) 3583 GEN_VEXT_VF(vfmsub_vf_w, 4) 3584 GEN_VEXT_VF(vfmsub_vf_d, 8) 3585 3586 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3587 { 3588 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3589 } 3590 3591 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3592 { 3593 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3594 } 3595 3596 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3597 { 3598 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3599 } 3600 3601 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3602 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3603 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3604 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3605 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3606 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3607 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3608 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3609 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3610 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3611 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3612 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3613 3614 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3615 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3616 { 3617 return float32_muladd(float16_to_float32(a, true, s), 3618 float16_to_float32(b, true, s), d, 0, s); 3619 } 3620 3621 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3622 { 3623 return float64_muladd(float32_to_float64(a, s), 3624 float32_to_float64(b, s), d, 0, s); 3625 } 3626 3627 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3628 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3629 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3630 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3631 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3632 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3633 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3634 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3635 3636 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3637 { 3638 return float32_muladd(bfloat16_to_float32(a, s), 3639 bfloat16_to_float32(b, s), d, 0, s); 3640 } 3641 3642 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3643 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3644 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3645 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3646 3647 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3648 { 3649 return float32_muladd(float16_to_float32(a, true, s), 3650 float16_to_float32(b, true, s), d, 3651 float_muladd_negate_c | float_muladd_negate_product, 3652 s); 3653 } 3654 3655 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3656 { 3657 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3658 d, float_muladd_negate_c | 3659 float_muladd_negate_product, s); 3660 } 3661 3662 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3663 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3664 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3665 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3666 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3667 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3668 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3669 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3670 3671 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3672 { 3673 return float32_muladd(float16_to_float32(a, true, s), 3674 float16_to_float32(b, true, s), d, 3675 float_muladd_negate_c, s); 3676 } 3677 3678 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3679 { 3680 return float64_muladd(float32_to_float64(a, s), 3681 float32_to_float64(b, s), d, 3682 float_muladd_negate_c, s); 3683 } 3684 3685 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3686 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3687 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3688 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3689 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3690 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3691 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3692 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3693 3694 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3695 { 3696 return float32_muladd(float16_to_float32(a, true, s), 3697 float16_to_float32(b, true, s), d, 3698 float_muladd_negate_product, s); 3699 } 3700 3701 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3702 { 3703 return float64_muladd(float32_to_float64(a, s), 3704 float32_to_float64(b, s), d, 3705 float_muladd_negate_product, s); 3706 } 3707 3708 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3709 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3710 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3711 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3712 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3713 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3714 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3715 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3716 3717 /* Vector Floating-Point Square-Root Instruction */ 3718 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3719 static void do_##NAME(void *vd, void *vs2, int i, \ 3720 CPURISCVState *env) \ 3721 { \ 3722 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3723 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3724 } 3725 3726 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3727 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3728 CPURISCVState *env, uint32_t desc) \ 3729 { \ 3730 uint32_t vm = vext_vm(desc); \ 3731 uint32_t vl = env->vl; \ 3732 uint32_t total_elems = \ 3733 vext_get_total_elems(env, desc, ESZ); \ 3734 uint32_t vta = vext_vta(desc); \ 3735 uint32_t vma = vext_vma(desc); \ 3736 uint32_t i; \ 3737 \ 3738 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3739 \ 3740 if (vl == 0) { \ 3741 return; \ 3742 } \ 3743 for (i = env->vstart; i < vl; i++) { \ 3744 if (!vm && !vext_elem_mask(v0, i)) { \ 3745 /* set masked-off elements to 1s */ \ 3746 vext_set_elems_1s(vd, vma, i * ESZ, \ 3747 (i + 1) * ESZ); \ 3748 continue; \ 3749 } \ 3750 do_##NAME(vd, vs2, i, env); \ 3751 } \ 3752 env->vstart = 0; \ 3753 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3754 total_elems * ESZ); \ 3755 } 3756 3757 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3758 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3759 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3760 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3761 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3762 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3763 3764 /* 3765 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3766 * 3767 * Adapted from riscv-v-spec recip.c: 3768 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3769 */ 3770 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3771 { 3772 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3773 uint64_t exp = extract64(f, frac_size, exp_size); 3774 uint64_t frac = extract64(f, 0, frac_size); 3775 3776 const uint8_t lookup_table[] = { 3777 52, 51, 50, 48, 47, 46, 44, 43, 3778 42, 41, 40, 39, 38, 36, 35, 34, 3779 33, 32, 31, 30, 30, 29, 28, 27, 3780 26, 25, 24, 23, 23, 22, 21, 20, 3781 19, 19, 18, 17, 16, 16, 15, 14, 3782 14, 13, 12, 12, 11, 10, 10, 9, 3783 9, 8, 7, 7, 6, 6, 5, 4, 3784 4, 3, 3, 2, 2, 1, 1, 0, 3785 127, 125, 123, 121, 119, 118, 116, 114, 3786 113, 111, 109, 108, 106, 105, 103, 102, 3787 100, 99, 97, 96, 95, 93, 92, 91, 3788 90, 88, 87, 86, 85, 84, 83, 82, 3789 80, 79, 78, 77, 76, 75, 74, 73, 3790 72, 71, 70, 70, 69, 68, 67, 66, 3791 65, 64, 63, 63, 62, 61, 60, 59, 3792 59, 58, 57, 56, 56, 55, 54, 53 3793 }; 3794 const int precision = 7; 3795 3796 if (exp == 0 && frac != 0) { /* subnormal */ 3797 /* Normalize the subnormal. */ 3798 while (extract64(frac, frac_size - 1, 1) == 0) { 3799 exp--; 3800 frac <<= 1; 3801 } 3802 3803 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3804 } 3805 3806 int idx = ((exp & 1) << (precision - 1)) | 3807 (frac >> (frac_size - precision + 1)); 3808 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3809 (frac_size - precision); 3810 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3811 3812 uint64_t val = 0; 3813 val = deposit64(val, 0, frac_size, out_frac); 3814 val = deposit64(val, frac_size, exp_size, out_exp); 3815 val = deposit64(val, frac_size + exp_size, 1, sign); 3816 return val; 3817 } 3818 3819 static float16 frsqrt7_h(float16 f, float_status *s) 3820 { 3821 int exp_size = 5, frac_size = 10; 3822 bool sign = float16_is_neg(f); 3823 3824 /* 3825 * frsqrt7(sNaN) = canonical NaN 3826 * frsqrt7(-inf) = canonical NaN 3827 * frsqrt7(-normal) = canonical NaN 3828 * frsqrt7(-subnormal) = canonical NaN 3829 */ 3830 if (float16_is_signaling_nan(f, s) || 3831 (float16_is_infinity(f) && sign) || 3832 (float16_is_normal(f) && sign) || 3833 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3834 s->float_exception_flags |= float_flag_invalid; 3835 return float16_default_nan(s); 3836 } 3837 3838 /* frsqrt7(qNaN) = canonical NaN */ 3839 if (float16_is_quiet_nan(f, s)) { 3840 return float16_default_nan(s); 3841 } 3842 3843 /* frsqrt7(+-0) = +-inf */ 3844 if (float16_is_zero(f)) { 3845 s->float_exception_flags |= float_flag_divbyzero; 3846 return float16_set_sign(float16_infinity, sign); 3847 } 3848 3849 /* frsqrt7(+inf) = +0 */ 3850 if (float16_is_infinity(f) && !sign) { 3851 return float16_set_sign(float16_zero, sign); 3852 } 3853 3854 /* +normal, +subnormal */ 3855 uint64_t val = frsqrt7(f, exp_size, frac_size); 3856 return make_float16(val); 3857 } 3858 3859 static float32 frsqrt7_s(float32 f, float_status *s) 3860 { 3861 int exp_size = 8, frac_size = 23; 3862 bool sign = float32_is_neg(f); 3863 3864 /* 3865 * frsqrt7(sNaN) = canonical NaN 3866 * frsqrt7(-inf) = canonical NaN 3867 * frsqrt7(-normal) = canonical NaN 3868 * frsqrt7(-subnormal) = canonical NaN 3869 */ 3870 if (float32_is_signaling_nan(f, s) || 3871 (float32_is_infinity(f) && sign) || 3872 (float32_is_normal(f) && sign) || 3873 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3874 s->float_exception_flags |= float_flag_invalid; 3875 return float32_default_nan(s); 3876 } 3877 3878 /* frsqrt7(qNaN) = canonical NaN */ 3879 if (float32_is_quiet_nan(f, s)) { 3880 return float32_default_nan(s); 3881 } 3882 3883 /* frsqrt7(+-0) = +-inf */ 3884 if (float32_is_zero(f)) { 3885 s->float_exception_flags |= float_flag_divbyzero; 3886 return float32_set_sign(float32_infinity, sign); 3887 } 3888 3889 /* frsqrt7(+inf) = +0 */ 3890 if (float32_is_infinity(f) && !sign) { 3891 return float32_set_sign(float32_zero, sign); 3892 } 3893 3894 /* +normal, +subnormal */ 3895 uint64_t val = frsqrt7(f, exp_size, frac_size); 3896 return make_float32(val); 3897 } 3898 3899 static float64 frsqrt7_d(float64 f, float_status *s) 3900 { 3901 int exp_size = 11, frac_size = 52; 3902 bool sign = float64_is_neg(f); 3903 3904 /* 3905 * frsqrt7(sNaN) = canonical NaN 3906 * frsqrt7(-inf) = canonical NaN 3907 * frsqrt7(-normal) = canonical NaN 3908 * frsqrt7(-subnormal) = canonical NaN 3909 */ 3910 if (float64_is_signaling_nan(f, s) || 3911 (float64_is_infinity(f) && sign) || 3912 (float64_is_normal(f) && sign) || 3913 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3914 s->float_exception_flags |= float_flag_invalid; 3915 return float64_default_nan(s); 3916 } 3917 3918 /* frsqrt7(qNaN) = canonical NaN */ 3919 if (float64_is_quiet_nan(f, s)) { 3920 return float64_default_nan(s); 3921 } 3922 3923 /* frsqrt7(+-0) = +-inf */ 3924 if (float64_is_zero(f)) { 3925 s->float_exception_flags |= float_flag_divbyzero; 3926 return float64_set_sign(float64_infinity, sign); 3927 } 3928 3929 /* frsqrt7(+inf) = +0 */ 3930 if (float64_is_infinity(f) && !sign) { 3931 return float64_set_sign(float64_zero, sign); 3932 } 3933 3934 /* +normal, +subnormal */ 3935 uint64_t val = frsqrt7(f, exp_size, frac_size); 3936 return make_float64(val); 3937 } 3938 3939 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3940 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3941 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3942 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3943 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3944 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3945 3946 /* 3947 * Vector Floating-Point Reciprocal Estimate Instruction 3948 * 3949 * Adapted from riscv-v-spec recip.c: 3950 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3951 */ 3952 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3953 float_status *s) 3954 { 3955 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3956 uint64_t exp = extract64(f, frac_size, exp_size); 3957 uint64_t frac = extract64(f, 0, frac_size); 3958 3959 const uint8_t lookup_table[] = { 3960 127, 125, 123, 121, 119, 117, 116, 114, 3961 112, 110, 109, 107, 105, 104, 102, 100, 3962 99, 97, 96, 94, 93, 91, 90, 88, 3963 87, 85, 84, 83, 81, 80, 79, 77, 3964 76, 75, 74, 72, 71, 70, 69, 68, 3965 66, 65, 64, 63, 62, 61, 60, 59, 3966 58, 57, 56, 55, 54, 53, 52, 51, 3967 50, 49, 48, 47, 46, 45, 44, 43, 3968 42, 41, 40, 40, 39, 38, 37, 36, 3969 35, 35, 34, 33, 32, 31, 31, 30, 3970 29, 28, 28, 27, 26, 25, 25, 24, 3971 23, 23, 22, 21, 21, 20, 19, 19, 3972 18, 17, 17, 16, 15, 15, 14, 14, 3973 13, 12, 12, 11, 11, 10, 9, 9, 3974 8, 8, 7, 7, 6, 5, 5, 4, 3975 4, 3, 3, 2, 2, 1, 1, 0 3976 }; 3977 const int precision = 7; 3978 3979 if (exp == 0 && frac != 0) { /* subnormal */ 3980 /* Normalize the subnormal. */ 3981 while (extract64(frac, frac_size - 1, 1) == 0) { 3982 exp--; 3983 frac <<= 1; 3984 } 3985 3986 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3987 3988 if (exp != 0 && exp != UINT64_MAX) { 3989 /* 3990 * Overflow to inf or max value of same sign, 3991 * depending on sign and rounding mode. 3992 */ 3993 s->float_exception_flags |= (float_flag_inexact | 3994 float_flag_overflow); 3995 3996 if ((s->float_rounding_mode == float_round_to_zero) || 3997 ((s->float_rounding_mode == float_round_down) && !sign) || 3998 ((s->float_rounding_mode == float_round_up) && sign)) { 3999 /* Return greatest/negative finite value. */ 4000 return (sign << (exp_size + frac_size)) | 4001 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 4002 } else { 4003 /* Return +-inf. */ 4004 return (sign << (exp_size + frac_size)) | 4005 MAKE_64BIT_MASK(frac_size, exp_size); 4006 } 4007 } 4008 } 4009 4010 int idx = frac >> (frac_size - precision); 4011 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 4012 (frac_size - precision); 4013 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 4014 4015 if (out_exp == 0 || out_exp == UINT64_MAX) { 4016 /* 4017 * The result is subnormal, but don't raise the underflow exception, 4018 * because there's no additional loss of precision. 4019 */ 4020 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 4021 if (out_exp == UINT64_MAX) { 4022 out_frac >>= 1; 4023 out_exp = 0; 4024 } 4025 } 4026 4027 uint64_t val = 0; 4028 val = deposit64(val, 0, frac_size, out_frac); 4029 val = deposit64(val, frac_size, exp_size, out_exp); 4030 val = deposit64(val, frac_size + exp_size, 1, sign); 4031 return val; 4032 } 4033 4034 static float16 frec7_h(float16 f, float_status *s) 4035 { 4036 int exp_size = 5, frac_size = 10; 4037 bool sign = float16_is_neg(f); 4038 4039 /* frec7(+-inf) = +-0 */ 4040 if (float16_is_infinity(f)) { 4041 return float16_set_sign(float16_zero, sign); 4042 } 4043 4044 /* frec7(+-0) = +-inf */ 4045 if (float16_is_zero(f)) { 4046 s->float_exception_flags |= float_flag_divbyzero; 4047 return float16_set_sign(float16_infinity, sign); 4048 } 4049 4050 /* frec7(sNaN) = canonical NaN */ 4051 if (float16_is_signaling_nan(f, s)) { 4052 s->float_exception_flags |= float_flag_invalid; 4053 return float16_default_nan(s); 4054 } 4055 4056 /* frec7(qNaN) = canonical NaN */ 4057 if (float16_is_quiet_nan(f, s)) { 4058 return float16_default_nan(s); 4059 } 4060 4061 /* +-normal, +-subnormal */ 4062 uint64_t val = frec7(f, exp_size, frac_size, s); 4063 return make_float16(val); 4064 } 4065 4066 static float32 frec7_s(float32 f, float_status *s) 4067 { 4068 int exp_size = 8, frac_size = 23; 4069 bool sign = float32_is_neg(f); 4070 4071 /* frec7(+-inf) = +-0 */ 4072 if (float32_is_infinity(f)) { 4073 return float32_set_sign(float32_zero, sign); 4074 } 4075 4076 /* frec7(+-0) = +-inf */ 4077 if (float32_is_zero(f)) { 4078 s->float_exception_flags |= float_flag_divbyzero; 4079 return float32_set_sign(float32_infinity, sign); 4080 } 4081 4082 /* frec7(sNaN) = canonical NaN */ 4083 if (float32_is_signaling_nan(f, s)) { 4084 s->float_exception_flags |= float_flag_invalid; 4085 return float32_default_nan(s); 4086 } 4087 4088 /* frec7(qNaN) = canonical NaN */ 4089 if (float32_is_quiet_nan(f, s)) { 4090 return float32_default_nan(s); 4091 } 4092 4093 /* +-normal, +-subnormal */ 4094 uint64_t val = frec7(f, exp_size, frac_size, s); 4095 return make_float32(val); 4096 } 4097 4098 static float64 frec7_d(float64 f, float_status *s) 4099 { 4100 int exp_size = 11, frac_size = 52; 4101 bool sign = float64_is_neg(f); 4102 4103 /* frec7(+-inf) = +-0 */ 4104 if (float64_is_infinity(f)) { 4105 return float64_set_sign(float64_zero, sign); 4106 } 4107 4108 /* frec7(+-0) = +-inf */ 4109 if (float64_is_zero(f)) { 4110 s->float_exception_flags |= float_flag_divbyzero; 4111 return float64_set_sign(float64_infinity, sign); 4112 } 4113 4114 /* frec7(sNaN) = canonical NaN */ 4115 if (float64_is_signaling_nan(f, s)) { 4116 s->float_exception_flags |= float_flag_invalid; 4117 return float64_default_nan(s); 4118 } 4119 4120 /* frec7(qNaN) = canonical NaN */ 4121 if (float64_is_quiet_nan(f, s)) { 4122 return float64_default_nan(s); 4123 } 4124 4125 /* +-normal, +-subnormal */ 4126 uint64_t val = frec7(f, exp_size, frac_size, s); 4127 return make_float64(val); 4128 } 4129 4130 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4131 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4132 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4133 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4134 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4135 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4136 4137 /* Vector Floating-Point MIN/MAX Instructions */ 4138 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4139 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4140 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4141 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4142 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4143 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4144 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4145 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4146 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4147 GEN_VEXT_VF(vfmin_vf_h, 2) 4148 GEN_VEXT_VF(vfmin_vf_w, 4) 4149 GEN_VEXT_VF(vfmin_vf_d, 8) 4150 4151 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4152 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4153 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4154 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4155 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4156 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4157 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4158 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4159 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4160 GEN_VEXT_VF(vfmax_vf_h, 2) 4161 GEN_VEXT_VF(vfmax_vf_w, 4) 4162 GEN_VEXT_VF(vfmax_vf_d, 8) 4163 4164 /* Vector Floating-Point Sign-Injection Instructions */ 4165 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4166 { 4167 return deposit64(b, 0, 15, a); 4168 } 4169 4170 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4171 { 4172 return deposit64(b, 0, 31, a); 4173 } 4174 4175 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4176 { 4177 return deposit64(b, 0, 63, a); 4178 } 4179 4180 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4181 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4182 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4183 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4184 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4185 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4186 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4187 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4188 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4189 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4190 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4191 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4192 4193 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4194 { 4195 return deposit64(~b, 0, 15, a); 4196 } 4197 4198 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4199 { 4200 return deposit64(~b, 0, 31, a); 4201 } 4202 4203 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4204 { 4205 return deposit64(~b, 0, 63, a); 4206 } 4207 4208 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4209 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4210 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4211 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4212 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4213 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4214 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4215 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4216 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4217 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4218 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4219 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4220 4221 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4222 { 4223 return deposit64(b ^ a, 0, 15, a); 4224 } 4225 4226 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4227 { 4228 return deposit64(b ^ a, 0, 31, a); 4229 } 4230 4231 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4232 { 4233 return deposit64(b ^ a, 0, 63, a); 4234 } 4235 4236 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4237 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4238 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4239 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4240 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4241 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4242 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4243 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4244 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4245 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4246 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4247 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4248 4249 /* Vector Floating-Point Compare Instructions */ 4250 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4251 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4252 CPURISCVState *env, uint32_t desc) \ 4253 { \ 4254 uint32_t vm = vext_vm(desc); \ 4255 uint32_t vl = env->vl; \ 4256 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4257 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4258 uint32_t vma = vext_vma(desc); \ 4259 uint32_t i; \ 4260 \ 4261 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4262 \ 4263 for (i = env->vstart; i < vl; i++) { \ 4264 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4265 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4266 if (!vm && !vext_elem_mask(v0, i)) { \ 4267 /* set masked-off elements to 1s */ \ 4268 if (vma) { \ 4269 vext_set_elem_mask(vd, i, 1); \ 4270 } \ 4271 continue; \ 4272 } \ 4273 vext_set_elem_mask(vd, i, \ 4274 DO_OP(s2, s1, &env->fp_status)); \ 4275 } \ 4276 env->vstart = 0; \ 4277 /* 4278 * mask destination register are always tail-agnostic 4279 * set tail elements to 1s 4280 */ \ 4281 if (vta_all_1s) { \ 4282 for (; i < total_elems; i++) { \ 4283 vext_set_elem_mask(vd, i, 1); \ 4284 } \ 4285 } \ 4286 } 4287 4288 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4289 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4290 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4291 4292 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4293 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4294 CPURISCVState *env, uint32_t desc) \ 4295 { \ 4296 uint32_t vm = vext_vm(desc); \ 4297 uint32_t vl = env->vl; \ 4298 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4299 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4300 uint32_t vma = vext_vma(desc); \ 4301 uint32_t i; \ 4302 \ 4303 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4304 \ 4305 for (i = env->vstart; i < vl; i++) { \ 4306 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4307 if (!vm && !vext_elem_mask(v0, i)) { \ 4308 /* set masked-off elements to 1s */ \ 4309 if (vma) { \ 4310 vext_set_elem_mask(vd, i, 1); \ 4311 } \ 4312 continue; \ 4313 } \ 4314 vext_set_elem_mask(vd, i, \ 4315 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4316 } \ 4317 env->vstart = 0; \ 4318 /* 4319 * mask destination register are always tail-agnostic 4320 * set tail elements to 1s 4321 */ \ 4322 if (vta_all_1s) { \ 4323 for (; i < total_elems; i++) { \ 4324 vext_set_elem_mask(vd, i, 1); \ 4325 } \ 4326 } \ 4327 } 4328 4329 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4330 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4331 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4332 4333 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4334 { 4335 FloatRelation compare = float16_compare_quiet(a, b, s); 4336 return compare != float_relation_equal; 4337 } 4338 4339 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4340 { 4341 FloatRelation compare = float32_compare_quiet(a, b, s); 4342 return compare != float_relation_equal; 4343 } 4344 4345 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4346 { 4347 FloatRelation compare = float64_compare_quiet(a, b, s); 4348 return compare != float_relation_equal; 4349 } 4350 4351 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4352 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4353 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4354 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4355 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4356 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4357 4358 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4359 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4360 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4361 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4362 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4363 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4364 4365 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4366 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4367 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4368 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4369 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4370 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4371 4372 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4373 { 4374 FloatRelation compare = float16_compare(a, b, s); 4375 return compare == float_relation_greater; 4376 } 4377 4378 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4379 { 4380 FloatRelation compare = float32_compare(a, b, s); 4381 return compare == float_relation_greater; 4382 } 4383 4384 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4385 { 4386 FloatRelation compare = float64_compare(a, b, s); 4387 return compare == float_relation_greater; 4388 } 4389 4390 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4391 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4392 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4393 4394 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4395 { 4396 FloatRelation compare = float16_compare(a, b, s); 4397 return compare == float_relation_greater || 4398 compare == float_relation_equal; 4399 } 4400 4401 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4402 { 4403 FloatRelation compare = float32_compare(a, b, s); 4404 return compare == float_relation_greater || 4405 compare == float_relation_equal; 4406 } 4407 4408 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4409 { 4410 FloatRelation compare = float64_compare(a, b, s); 4411 return compare == float_relation_greater || 4412 compare == float_relation_equal; 4413 } 4414 4415 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4416 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4417 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4418 4419 /* Vector Floating-Point Classify Instruction */ 4420 target_ulong fclass_h(uint64_t frs1) 4421 { 4422 float16 f = frs1; 4423 bool sign = float16_is_neg(f); 4424 4425 if (float16_is_infinity(f)) { 4426 return sign ? 1 << 0 : 1 << 7; 4427 } else if (float16_is_zero(f)) { 4428 return sign ? 1 << 3 : 1 << 4; 4429 } else if (float16_is_zero_or_denormal(f)) { 4430 return sign ? 1 << 2 : 1 << 5; 4431 } else if (float16_is_any_nan(f)) { 4432 float_status s = { }; /* for snan_bit_is_one */ 4433 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4434 } else { 4435 return sign ? 1 << 1 : 1 << 6; 4436 } 4437 } 4438 4439 target_ulong fclass_s(uint64_t frs1) 4440 { 4441 float32 f = frs1; 4442 bool sign = float32_is_neg(f); 4443 4444 if (float32_is_infinity(f)) { 4445 return sign ? 1 << 0 : 1 << 7; 4446 } else if (float32_is_zero(f)) { 4447 return sign ? 1 << 3 : 1 << 4; 4448 } else if (float32_is_zero_or_denormal(f)) { 4449 return sign ? 1 << 2 : 1 << 5; 4450 } else if (float32_is_any_nan(f)) { 4451 float_status s = { }; /* for snan_bit_is_one */ 4452 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4453 } else { 4454 return sign ? 1 << 1 : 1 << 6; 4455 } 4456 } 4457 4458 target_ulong fclass_d(uint64_t frs1) 4459 { 4460 float64 f = frs1; 4461 bool sign = float64_is_neg(f); 4462 4463 if (float64_is_infinity(f)) { 4464 return sign ? 1 << 0 : 1 << 7; 4465 } else if (float64_is_zero(f)) { 4466 return sign ? 1 << 3 : 1 << 4; 4467 } else if (float64_is_zero_or_denormal(f)) { 4468 return sign ? 1 << 2 : 1 << 5; 4469 } else if (float64_is_any_nan(f)) { 4470 float_status s = { }; /* for snan_bit_is_one */ 4471 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4472 } else { 4473 return sign ? 1 << 1 : 1 << 6; 4474 } 4475 } 4476 4477 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4478 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4479 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4480 GEN_VEXT_V(vfclass_v_h, 2) 4481 GEN_VEXT_V(vfclass_v_w, 4) 4482 GEN_VEXT_V(vfclass_v_d, 8) 4483 4484 /* Vector Floating-Point Merge Instruction */ 4485 4486 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4487 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4488 CPURISCVState *env, uint32_t desc) \ 4489 { \ 4490 uint32_t vm = vext_vm(desc); \ 4491 uint32_t vl = env->vl; \ 4492 uint32_t esz = sizeof(ETYPE); \ 4493 uint32_t total_elems = \ 4494 vext_get_total_elems(env, desc, esz); \ 4495 uint32_t vta = vext_vta(desc); \ 4496 uint32_t i; \ 4497 \ 4498 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4499 \ 4500 for (i = env->vstart; i < vl; i++) { \ 4501 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4502 *((ETYPE *)vd + H(i)) = \ 4503 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4504 } \ 4505 env->vstart = 0; \ 4506 /* set tail elements to 1s */ \ 4507 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4508 } 4509 4510 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4511 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4512 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4513 4514 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4515 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4516 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4517 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4518 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4519 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4520 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4521 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4522 4523 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4524 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4525 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4526 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4527 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4528 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4529 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4530 4531 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4532 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4533 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4534 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4535 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4536 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4537 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4538 4539 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4540 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4541 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4542 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4543 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4544 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4545 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4546 4547 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4548 /* (TD, T2, TX2) */ 4549 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4550 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4551 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4552 /* 4553 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4554 */ 4555 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4556 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4557 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4558 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4559 4560 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4561 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4562 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4563 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4564 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4565 4566 /* 4567 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4568 */ 4569 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4570 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4571 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4572 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4573 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4574 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4575 4576 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4577 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4578 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4579 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4580 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4581 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4582 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4583 4584 /* 4585 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4586 */ 4587 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4588 { 4589 return float16_to_float32(a, true, s); 4590 } 4591 4592 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4593 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4594 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4595 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4596 4597 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4598 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4599 4600 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4601 /* (TD, T2, TX2) */ 4602 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4603 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4604 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4605 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4606 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4607 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4608 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4609 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4610 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4611 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4612 4613 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4614 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4615 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4616 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4617 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4618 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4619 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4620 4621 /* 4622 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4623 */ 4624 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4625 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4626 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4627 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4628 4629 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4630 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4631 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4632 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4633 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4634 4635 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4636 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4637 { 4638 return float32_to_float16(a, true, s); 4639 } 4640 4641 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4642 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4643 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4644 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4645 4646 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4647 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4648 4649 /* 4650 * Vector Reduction Operations 4651 */ 4652 /* Vector Single-Width Integer Reduction Instructions */ 4653 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4654 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4655 void *vs2, CPURISCVState *env, \ 4656 uint32_t desc) \ 4657 { \ 4658 uint32_t vm = vext_vm(desc); \ 4659 uint32_t vl = env->vl; \ 4660 uint32_t esz = sizeof(TD); \ 4661 uint32_t vlenb = simd_maxsz(desc); \ 4662 uint32_t vta = vext_vta(desc); \ 4663 uint32_t i; \ 4664 TD s1 = *((TD *)vs1 + HD(0)); \ 4665 \ 4666 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4667 \ 4668 for (i = env->vstart; i < vl; i++) { \ 4669 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4670 if (!vm && !vext_elem_mask(v0, i)) { \ 4671 continue; \ 4672 } \ 4673 s1 = OP(s1, (TD)s2); \ 4674 } \ 4675 if (vl > 0) { \ 4676 *((TD *)vd + HD(0)) = s1; \ 4677 } \ 4678 env->vstart = 0; \ 4679 /* set tail elements to 1s */ \ 4680 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4681 } 4682 4683 /* vd[0] = sum(vs1[0], vs2[*]) */ 4684 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4685 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4686 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4687 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4688 4689 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4690 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4691 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4692 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4693 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4694 4695 /* vd[0] = max(vs1[0], vs2[*]) */ 4696 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4697 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4698 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4699 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4700 4701 /* vd[0] = minu(vs1[0], vs2[*]) */ 4702 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4703 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4704 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4705 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4706 4707 /* vd[0] = min(vs1[0], vs2[*]) */ 4708 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4709 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4710 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4711 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4712 4713 /* vd[0] = and(vs1[0], vs2[*]) */ 4714 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4715 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4716 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4717 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4718 4719 /* vd[0] = or(vs1[0], vs2[*]) */ 4720 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4721 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4722 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4723 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4724 4725 /* vd[0] = xor(vs1[0], vs2[*]) */ 4726 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4727 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4728 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4729 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4730 4731 /* Vector Widening Integer Reduction Instructions */ 4732 /* signed sum reduction into double-width accumulator */ 4733 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4734 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4735 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4736 4737 /* Unsigned sum reduction into double-width accumulator */ 4738 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4739 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4740 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4741 4742 /* Vector Single-Width Floating-Point Reduction Instructions */ 4743 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4744 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4745 void *vs2, CPURISCVState *env, \ 4746 uint32_t desc) \ 4747 { \ 4748 uint32_t vm = vext_vm(desc); \ 4749 uint32_t vl = env->vl; \ 4750 uint32_t esz = sizeof(TD); \ 4751 uint32_t vlenb = simd_maxsz(desc); \ 4752 uint32_t vta = vext_vta(desc); \ 4753 uint32_t i; \ 4754 TD s1 = *((TD *)vs1 + HD(0)); \ 4755 \ 4756 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4757 \ 4758 for (i = env->vstart; i < vl; i++) { \ 4759 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4760 if (!vm && !vext_elem_mask(v0, i)) { \ 4761 continue; \ 4762 } \ 4763 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4764 } \ 4765 if (vl > 0) { \ 4766 *((TD *)vd + HD(0)) = s1; \ 4767 } \ 4768 env->vstart = 0; \ 4769 /* set tail elements to 1s */ \ 4770 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4771 } 4772 4773 /* Unordered sum */ 4774 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4775 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4776 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4777 4778 /* Ordered sum */ 4779 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4780 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4781 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4782 4783 /* Maximum value */ 4784 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4785 float16_maximum_number) 4786 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4787 float32_maximum_number) 4788 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4789 float64_maximum_number) 4790 4791 /* Minimum value */ 4792 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4793 float16_minimum_number) 4794 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4795 float32_minimum_number) 4796 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4797 float64_minimum_number) 4798 4799 /* Vector Widening Floating-Point Add Instructions */ 4800 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4801 { 4802 return float32_add(a, float16_to_float32(b, true, s), s); 4803 } 4804 4805 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4806 { 4807 return float64_add(a, float32_to_float64(b, s), s); 4808 } 4809 4810 /* Vector Widening Floating-Point Reduction Instructions */ 4811 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4812 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4813 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4814 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4815 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4816 4817 /* 4818 * Vector Mask Operations 4819 */ 4820 /* Vector Mask-Register Logical Instructions */ 4821 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4822 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4823 void *vs2, CPURISCVState *env, \ 4824 uint32_t desc) \ 4825 { \ 4826 uint32_t vl = env->vl; \ 4827 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4828 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4829 uint32_t i; \ 4830 int a, b; \ 4831 \ 4832 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4833 \ 4834 for (i = env->vstart; i < vl; i++) { \ 4835 a = vext_elem_mask(vs1, i); \ 4836 b = vext_elem_mask(vs2, i); \ 4837 vext_set_elem_mask(vd, i, OP(b, a)); \ 4838 } \ 4839 env->vstart = 0; \ 4840 /* 4841 * mask destination register are always tail-agnostic 4842 * set tail elements to 1s 4843 */ \ 4844 if (vta_all_1s) { \ 4845 for (; i < total_elems; i++) { \ 4846 vext_set_elem_mask(vd, i, 1); \ 4847 } \ 4848 } \ 4849 } 4850 4851 #define DO_NAND(N, M) (!(N & M)) 4852 #define DO_ANDNOT(N, M) (N & !M) 4853 #define DO_NOR(N, M) (!(N | M)) 4854 #define DO_ORNOT(N, M) (N | !M) 4855 #define DO_XNOR(N, M) (!(N ^ M)) 4856 4857 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4858 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4859 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4860 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4861 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4862 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4863 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4864 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4865 4866 /* Vector count population in mask vcpop */ 4867 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4868 uint32_t desc) 4869 { 4870 target_ulong cnt = 0; 4871 uint32_t vm = vext_vm(desc); 4872 uint32_t vl = env->vl; 4873 int i; 4874 4875 for (i = env->vstart; i < vl; i++) { 4876 if (vm || vext_elem_mask(v0, i)) { 4877 if (vext_elem_mask(vs2, i)) { 4878 cnt++; 4879 } 4880 } 4881 } 4882 env->vstart = 0; 4883 return cnt; 4884 } 4885 4886 /* vfirst find-first-set mask bit */ 4887 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4888 uint32_t desc) 4889 { 4890 uint32_t vm = vext_vm(desc); 4891 uint32_t vl = env->vl; 4892 int i; 4893 4894 for (i = env->vstart; i < vl; i++) { 4895 if (vm || vext_elem_mask(v0, i)) { 4896 if (vext_elem_mask(vs2, i)) { 4897 return i; 4898 } 4899 } 4900 } 4901 env->vstart = 0; 4902 return -1LL; 4903 } 4904 4905 enum set_mask_type { 4906 ONLY_FIRST = 1, 4907 INCLUDE_FIRST, 4908 BEFORE_FIRST, 4909 }; 4910 4911 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4912 uint32_t desc, enum set_mask_type type) 4913 { 4914 uint32_t vm = vext_vm(desc); 4915 uint32_t vl = env->vl; 4916 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4917 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4918 uint32_t vma = vext_vma(desc); 4919 int i; 4920 bool first_mask_bit = false; 4921 4922 VSTART_CHECK_EARLY_EXIT(env, vl); 4923 4924 for (i = env->vstart; i < vl; i++) { 4925 if (!vm && !vext_elem_mask(v0, i)) { 4926 /* set masked-off elements to 1s */ 4927 if (vma) { 4928 vext_set_elem_mask(vd, i, 1); 4929 } 4930 continue; 4931 } 4932 /* write a zero to all following active elements */ 4933 if (first_mask_bit) { 4934 vext_set_elem_mask(vd, i, 0); 4935 continue; 4936 } 4937 if (vext_elem_mask(vs2, i)) { 4938 first_mask_bit = true; 4939 if (type == BEFORE_FIRST) { 4940 vext_set_elem_mask(vd, i, 0); 4941 } else { 4942 vext_set_elem_mask(vd, i, 1); 4943 } 4944 } else { 4945 if (type == ONLY_FIRST) { 4946 vext_set_elem_mask(vd, i, 0); 4947 } else { 4948 vext_set_elem_mask(vd, i, 1); 4949 } 4950 } 4951 } 4952 env->vstart = 0; 4953 /* 4954 * mask destination register are always tail-agnostic 4955 * set tail elements to 1s 4956 */ 4957 if (vta_all_1s) { 4958 for (; i < total_elems; i++) { 4959 vext_set_elem_mask(vd, i, 1); 4960 } 4961 } 4962 } 4963 4964 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4965 uint32_t desc) 4966 { 4967 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4968 } 4969 4970 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4971 uint32_t desc) 4972 { 4973 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4974 } 4975 4976 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4977 uint32_t desc) 4978 { 4979 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4980 } 4981 4982 /* Vector Iota Instruction */ 4983 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4984 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4985 uint32_t desc) \ 4986 { \ 4987 uint32_t vm = vext_vm(desc); \ 4988 uint32_t vl = env->vl; \ 4989 uint32_t esz = sizeof(ETYPE); \ 4990 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4991 uint32_t vta = vext_vta(desc); \ 4992 uint32_t vma = vext_vma(desc); \ 4993 uint32_t sum = 0; \ 4994 int i; \ 4995 \ 4996 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4997 \ 4998 for (i = env->vstart; i < vl; i++) { \ 4999 if (!vm && !vext_elem_mask(v0, i)) { \ 5000 /* set masked-off elements to 1s */ \ 5001 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5002 continue; \ 5003 } \ 5004 *((ETYPE *)vd + H(i)) = sum; \ 5005 if (vext_elem_mask(vs2, i)) { \ 5006 sum++; \ 5007 } \ 5008 } \ 5009 env->vstart = 0; \ 5010 /* set tail elements to 1s */ \ 5011 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5012 } 5013 5014 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 5015 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 5016 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 5017 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 5018 5019 /* Vector Element Index Instruction */ 5020 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 5021 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 5022 { \ 5023 uint32_t vm = vext_vm(desc); \ 5024 uint32_t vl = env->vl; \ 5025 uint32_t esz = sizeof(ETYPE); \ 5026 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5027 uint32_t vta = vext_vta(desc); \ 5028 uint32_t vma = vext_vma(desc); \ 5029 int i; \ 5030 \ 5031 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5032 \ 5033 for (i = env->vstart; i < vl; i++) { \ 5034 if (!vm && !vext_elem_mask(v0, i)) { \ 5035 /* set masked-off elements to 1s */ \ 5036 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5037 continue; \ 5038 } \ 5039 *((ETYPE *)vd + H(i)) = i; \ 5040 } \ 5041 env->vstart = 0; \ 5042 /* set tail elements to 1s */ \ 5043 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5044 } 5045 5046 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 5047 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 5048 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 5049 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 5050 5051 /* 5052 * Vector Permutation Instructions 5053 */ 5054 5055 /* Vector Slide Instructions */ 5056 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 5057 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5058 CPURISCVState *env, uint32_t desc) \ 5059 { \ 5060 uint32_t vm = vext_vm(desc); \ 5061 uint32_t vl = env->vl; \ 5062 uint32_t esz = sizeof(ETYPE); \ 5063 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5064 uint32_t vta = vext_vta(desc); \ 5065 uint32_t vma = vext_vma(desc); \ 5066 target_ulong offset = s1, i_min, i; \ 5067 \ 5068 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5069 \ 5070 i_min = MAX(env->vstart, offset); \ 5071 for (i = i_min; i < vl; i++) { \ 5072 if (!vm && !vext_elem_mask(v0, i)) { \ 5073 /* set masked-off elements to 1s */ \ 5074 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5075 continue; \ 5076 } \ 5077 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5078 } \ 5079 env->vstart = 0; \ 5080 /* set tail elements to 1s */ \ 5081 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5082 } 5083 5084 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5085 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5086 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5087 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5088 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5089 5090 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5091 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5092 CPURISCVState *env, uint32_t desc) \ 5093 { \ 5094 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5095 uint32_t vm = vext_vm(desc); \ 5096 uint32_t vl = env->vl; \ 5097 uint32_t esz = sizeof(ETYPE); \ 5098 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5099 uint32_t vta = vext_vta(desc); \ 5100 uint32_t vma = vext_vma(desc); \ 5101 target_ulong i_max, i_min, i; \ 5102 \ 5103 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5104 \ 5105 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 5106 i_max = MAX(i_min, env->vstart); \ 5107 for (i = env->vstart; i < i_max; ++i) { \ 5108 if (!vm && !vext_elem_mask(v0, i)) { \ 5109 /* set masked-off elements to 1s */ \ 5110 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5111 continue; \ 5112 } \ 5113 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5114 } \ 5115 \ 5116 for (i = i_max; i < vl; ++i) { \ 5117 if (vm || vext_elem_mask(v0, i)) { \ 5118 *((ETYPE *)vd + H(i)) = 0; \ 5119 } \ 5120 } \ 5121 \ 5122 env->vstart = 0; \ 5123 /* set tail elements to 1s */ \ 5124 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5125 } 5126 5127 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5128 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5129 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5130 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5131 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5132 5133 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5134 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5135 void *vs2, CPURISCVState *env, \ 5136 uint32_t desc) \ 5137 { \ 5138 typedef uint##BITWIDTH##_t ETYPE; \ 5139 uint32_t vm = vext_vm(desc); \ 5140 uint32_t vl = env->vl; \ 5141 uint32_t esz = sizeof(ETYPE); \ 5142 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5143 uint32_t vta = vext_vta(desc); \ 5144 uint32_t vma = vext_vma(desc); \ 5145 uint32_t i; \ 5146 \ 5147 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5148 \ 5149 for (i = env->vstart; i < vl; i++) { \ 5150 if (!vm && !vext_elem_mask(v0, i)) { \ 5151 /* set masked-off elements to 1s */ \ 5152 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5153 continue; \ 5154 } \ 5155 if (i == 0) { \ 5156 *((ETYPE *)vd + H(i)) = s1; \ 5157 } else { \ 5158 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5159 } \ 5160 } \ 5161 env->vstart = 0; \ 5162 /* set tail elements to 1s */ \ 5163 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5164 } 5165 5166 GEN_VEXT_VSLIE1UP(8, H1) 5167 GEN_VEXT_VSLIE1UP(16, H2) 5168 GEN_VEXT_VSLIE1UP(32, H4) 5169 GEN_VEXT_VSLIE1UP(64, H8) 5170 5171 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5172 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5173 CPURISCVState *env, uint32_t desc) \ 5174 { \ 5175 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5176 } 5177 5178 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5179 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5180 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5181 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5182 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5183 5184 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5185 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5186 void *vs2, CPURISCVState *env, \ 5187 uint32_t desc) \ 5188 { \ 5189 typedef uint##BITWIDTH##_t ETYPE; \ 5190 uint32_t vm = vext_vm(desc); \ 5191 uint32_t vl = env->vl; \ 5192 uint32_t esz = sizeof(ETYPE); \ 5193 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5194 uint32_t vta = vext_vta(desc); \ 5195 uint32_t vma = vext_vma(desc); \ 5196 uint32_t i; \ 5197 \ 5198 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5199 \ 5200 for (i = env->vstart; i < vl; i++) { \ 5201 if (!vm && !vext_elem_mask(v0, i)) { \ 5202 /* set masked-off elements to 1s */ \ 5203 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5204 continue; \ 5205 } \ 5206 if (i == vl - 1) { \ 5207 *((ETYPE *)vd + H(i)) = s1; \ 5208 } else { \ 5209 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5210 } \ 5211 } \ 5212 env->vstart = 0; \ 5213 /* set tail elements to 1s */ \ 5214 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5215 } 5216 5217 GEN_VEXT_VSLIDE1DOWN(8, H1) 5218 GEN_VEXT_VSLIDE1DOWN(16, H2) 5219 GEN_VEXT_VSLIDE1DOWN(32, H4) 5220 GEN_VEXT_VSLIDE1DOWN(64, H8) 5221 5222 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5223 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5224 CPURISCVState *env, uint32_t desc) \ 5225 { \ 5226 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5227 } 5228 5229 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5230 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5231 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5232 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5233 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5234 5235 /* Vector Floating-Point Slide Instructions */ 5236 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5237 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5238 CPURISCVState *env, uint32_t desc) \ 5239 { \ 5240 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5241 } 5242 5243 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5244 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5245 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5246 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5247 5248 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5249 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5250 CPURISCVState *env, uint32_t desc) \ 5251 { \ 5252 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5253 } 5254 5255 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5256 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5257 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5258 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5259 5260 /* Vector Register Gather Instruction */ 5261 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5262 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5263 CPURISCVState *env, uint32_t desc) \ 5264 { \ 5265 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5266 uint32_t vm = vext_vm(desc); \ 5267 uint32_t vl = env->vl; \ 5268 uint32_t esz = sizeof(TS2); \ 5269 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5270 uint32_t vta = vext_vta(desc); \ 5271 uint32_t vma = vext_vma(desc); \ 5272 uint64_t index; \ 5273 uint32_t i; \ 5274 \ 5275 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5276 \ 5277 for (i = env->vstart; i < vl; i++) { \ 5278 if (!vm && !vext_elem_mask(v0, i)) { \ 5279 /* set masked-off elements to 1s */ \ 5280 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5281 continue; \ 5282 } \ 5283 index = *((TS1 *)vs1 + HS1(i)); \ 5284 if (index >= vlmax) { \ 5285 *((TS2 *)vd + HS2(i)) = 0; \ 5286 } else { \ 5287 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5288 } \ 5289 } \ 5290 env->vstart = 0; \ 5291 /* set tail elements to 1s */ \ 5292 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5293 } 5294 5295 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5296 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5297 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5298 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5299 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5300 5301 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5302 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5303 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5304 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5305 5306 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5307 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5308 CPURISCVState *env, uint32_t desc) \ 5309 { \ 5310 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5311 uint32_t vm = vext_vm(desc); \ 5312 uint32_t vl = env->vl; \ 5313 uint32_t esz = sizeof(ETYPE); \ 5314 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5315 uint32_t vta = vext_vta(desc); \ 5316 uint32_t vma = vext_vma(desc); \ 5317 uint64_t index = s1; \ 5318 uint32_t i; \ 5319 \ 5320 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5321 \ 5322 for (i = env->vstart; i < vl; i++) { \ 5323 if (!vm && !vext_elem_mask(v0, i)) { \ 5324 /* set masked-off elements to 1s */ \ 5325 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5326 continue; \ 5327 } \ 5328 if (index >= vlmax) { \ 5329 *((ETYPE *)vd + H(i)) = 0; \ 5330 } else { \ 5331 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5332 } \ 5333 } \ 5334 env->vstart = 0; \ 5335 /* set tail elements to 1s */ \ 5336 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5337 } 5338 5339 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5340 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5341 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5342 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5343 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5344 5345 /* Vector Compress Instruction */ 5346 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5347 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5348 CPURISCVState *env, uint32_t desc) \ 5349 { \ 5350 uint32_t vl = env->vl; \ 5351 uint32_t esz = sizeof(ETYPE); \ 5352 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5353 uint32_t vta = vext_vta(desc); \ 5354 uint32_t num = 0, i; \ 5355 \ 5356 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5357 \ 5358 for (i = env->vstart; i < vl; i++) { \ 5359 if (!vext_elem_mask(vs1, i)) { \ 5360 continue; \ 5361 } \ 5362 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5363 num++; \ 5364 } \ 5365 env->vstart = 0; \ 5366 /* set tail elements to 1s */ \ 5367 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ 5368 } 5369 5370 /* Compress into vd elements of vs2 where vs1 is enabled */ 5371 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5372 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5373 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5374 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5375 5376 /* Vector Whole Register Move */ 5377 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5378 { 5379 /* EEW = SEW */ 5380 uint32_t maxsz = simd_maxsz(desc); 5381 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5382 uint32_t startb = env->vstart * sewb; 5383 uint32_t i = startb; 5384 5385 if (startb >= maxsz) { 5386 env->vstart = 0; 5387 return; 5388 } 5389 5390 if (HOST_BIG_ENDIAN && i % 8 != 0) { 5391 uint32_t j = ROUND_UP(i, 8); 5392 memcpy((uint8_t *)vd + H1(j - 1), 5393 (uint8_t *)vs2 + H1(j - 1), 5394 j - i); 5395 i = j; 5396 } 5397 5398 memcpy((uint8_t *)vd + H1(i), 5399 (uint8_t *)vs2 + H1(i), 5400 maxsz - i); 5401 5402 env->vstart = 0; 5403 } 5404 5405 /* Vector Integer Extension */ 5406 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5407 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5408 CPURISCVState *env, uint32_t desc) \ 5409 { \ 5410 uint32_t vl = env->vl; \ 5411 uint32_t vm = vext_vm(desc); \ 5412 uint32_t esz = sizeof(ETYPE); \ 5413 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5414 uint32_t vta = vext_vta(desc); \ 5415 uint32_t vma = vext_vma(desc); \ 5416 uint32_t i; \ 5417 \ 5418 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5419 \ 5420 for (i = env->vstart; i < vl; i++) { \ 5421 if (!vm && !vext_elem_mask(v0, i)) { \ 5422 /* set masked-off elements to 1s */ \ 5423 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5424 continue; \ 5425 } \ 5426 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5427 } \ 5428 env->vstart = 0; \ 5429 /* set tail elements to 1s */ \ 5430 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5431 } 5432 5433 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5434 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5435 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5436 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5437 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5438 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5439 5440 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5441 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5442 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5443 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5444 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5445 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5446