1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/page-protection.h" 27 #include "exec/helper-proto.h" 28 #include "fpu/softfloat.h" 29 #include "tcg/tcg-gvec-desc.h" 30 #include "internals.h" 31 #include "vector_internals.h" 32 #include <math.h> 33 34 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 35 target_ulong s2) 36 { 37 int vlmax, vl; 38 RISCVCPU *cpu = env_archcpu(env); 39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 41 uint16_t sew = 8 << vsew; 42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 43 int xlen = riscv_cpu_xlen(env); 44 bool vill = (s2 >> (xlen - 1)) & 0x1; 45 target_ulong reserved = s2 & 46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 47 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 48 uint16_t vlen = cpu->cfg.vlenb << 3; 49 int8_t lmul; 50 51 if (vlmul & 4) { 52 /* 53 * Fractional LMUL, check: 54 * 55 * VLEN * LMUL >= SEW 56 * VLEN >> (8 - lmul) >= sew 57 * (vlenb << 3) >> (8 - lmul) >= sew 58 */ 59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { 60 vill = true; 61 } 62 } 63 64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 65 /* only set vill bit. */ 66 env->vill = 1; 67 env->vtype = 0; 68 env->vl = 0; 69 env->vstart = 0; 70 return 0; 71 } 72 73 /* lmul encoded as in DisasContext::lmul */ 74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 76 if (s1 <= vlmax) { 77 vl = s1; 78 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) { 79 vl = (s1 + 1) >> 1; 80 } else { 81 vl = vlmax; 82 } 83 env->vl = vl; 84 env->vtype = s2; 85 env->vstart = 0; 86 env->vill = 0; 87 return vl; 88 } 89 90 /* 91 * Get the maximum number of elements can be operated. 92 * 93 * log2_esz: log2 of element size in bytes. 94 */ 95 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 96 { 97 /* 98 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 99 * so vlen in bytes (vlenb) is encoded as maxsz. 100 */ 101 uint32_t vlenb = simd_maxsz(desc); 102 103 /* Return VLMAX */ 104 int scale = vext_lmul(desc) - log2_esz; 105 return scale < 0 ? vlenb >> -scale : vlenb << scale; 106 } 107 108 /* 109 * This function checks watchpoint before real load operation. 110 * 111 * In system mode, the TLB API probe_access is enough for watchpoint check. 112 * In user mode, there is no watchpoint support now. 113 * 114 * It will trigger an exception if there is no mapping in TLB 115 * and page table walk can't fill the TLB entry. Then the guest 116 * software can return here after process the exception or never return. 117 */ 118 static void probe_pages(CPURISCVState *env, target_ulong addr, 119 target_ulong len, uintptr_t ra, 120 MMUAccessType access_type) 121 { 122 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 123 target_ulong curlen = MIN(pagelen, len); 124 int mmu_index = riscv_env_mmu_index(env, false); 125 126 probe_access(env, adjust_addr(env, addr), curlen, access_type, 127 mmu_index, ra); 128 if (len > curlen) { 129 addr += curlen; 130 curlen = len - curlen; 131 probe_access(env, adjust_addr(env, addr), curlen, access_type, 132 mmu_index, ra); 133 } 134 } 135 136 static inline void vext_set_elem_mask(void *v0, int index, 137 uint8_t value) 138 { 139 int idx = index / 64; 140 int pos = index % 64; 141 uint64_t old = ((uint64_t *)v0)[idx]; 142 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 143 } 144 145 /* elements operations for load and store */ 146 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, 147 uint32_t idx, void *vd, uintptr_t retaddr); 148 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); 149 150 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 151 static inline QEMU_ALWAYS_INLINE \ 152 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 153 uint32_t idx, void *vd, uintptr_t retaddr) \ 154 { \ 155 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 156 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 157 } \ 158 \ 159 static inline QEMU_ALWAYS_INLINE \ 160 void NAME##_host(void *vd, uint32_t idx, void *host) \ 161 { \ 162 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 163 *cur = (ETYPE)LDSUF##_p(host); \ 164 } 165 166 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub) 167 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw) 168 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) 169 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) 170 171 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 172 static inline QEMU_ALWAYS_INLINE \ 173 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 174 uint32_t idx, void *vd, uintptr_t retaddr) \ 175 { \ 176 ETYPE data = *((ETYPE *)vd + H(idx)); \ 177 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 178 } \ 179 \ 180 static inline QEMU_ALWAYS_INLINE \ 181 void NAME##_host(void *vd, uint32_t idx, void *host) \ 182 { \ 183 ETYPE data = *((ETYPE *)vd + H(idx)); \ 184 STSUF##_p(host, data); \ 185 } 186 187 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb) 188 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw) 189 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl) 190 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq) 191 192 static inline QEMU_ALWAYS_INLINE void 193 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb, 194 void *vd, uint32_t evl, target_ulong addr, 195 uint32_t reg_start, uintptr_t ra, uint32_t esz, 196 bool is_load) 197 { 198 uint32_t i; 199 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) { 200 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra); 201 } 202 } 203 204 static inline QEMU_ALWAYS_INLINE void 205 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host, 206 void *vd, uint32_t evl, uint32_t reg_start, void *host, 207 uint32_t esz, bool is_load) 208 { 209 #if HOST_BIG_ENDIAN 210 for (; reg_start < evl; reg_start++, host += esz) { 211 ldst_host(vd, reg_start, host); 212 } 213 #else 214 if (esz == 1) { 215 uint32_t byte_offset = reg_start * esz; 216 uint32_t size = (evl - reg_start) * esz; 217 218 if (is_load) { 219 memcpy(vd + byte_offset, host, size); 220 } else { 221 memcpy(host, vd + byte_offset, size); 222 } 223 } else { 224 for (; reg_start < evl; reg_start++, host += esz) { 225 ldst_host(vd, reg_start, host); 226 } 227 } 228 #endif 229 } 230 231 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 232 uint32_t desc, uint32_t nf, 233 uint32_t esz, uint32_t max_elems) 234 { 235 uint32_t vta = vext_vta(desc); 236 int k; 237 238 if (vta == 0) { 239 return; 240 } 241 242 for (k = 0; k < nf; ++k) { 243 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 244 (k * max_elems + max_elems) * esz); 245 } 246 } 247 248 /* 249 * stride: access vector element from strided memory 250 */ 251 static void 252 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, 253 CPURISCVState *env, uint32_t desc, uint32_t vm, 254 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz, 255 uintptr_t ra) 256 { 257 uint32_t i, k; 258 uint32_t nf = vext_nf(desc); 259 uint32_t max_elems = vext_max_elems(desc, log2_esz); 260 uint32_t esz = 1 << log2_esz; 261 uint32_t vma = vext_vma(desc); 262 263 VSTART_CHECK_EARLY_EXIT(env); 264 265 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 266 k = 0; 267 while (k < nf) { 268 if (!vm && !vext_elem_mask(v0, i)) { 269 /* set masked-off elements to 1s */ 270 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 271 (i + k * max_elems + 1) * esz); 272 k++; 273 continue; 274 } 275 target_ulong addr = base + stride * i + (k << log2_esz); 276 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 277 k++; 278 } 279 } 280 env->vstart = 0; 281 282 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 283 } 284 285 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 286 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 287 target_ulong stride, CPURISCVState *env, \ 288 uint32_t desc) \ 289 { \ 290 uint32_t vm = vext_vm(desc); \ 291 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 292 ctzl(sizeof(ETYPE)), GETPC()); \ 293 } 294 295 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb) 296 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb) 297 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb) 298 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb) 299 300 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 301 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 302 target_ulong stride, CPURISCVState *env, \ 303 uint32_t desc) \ 304 { \ 305 uint32_t vm = vext_vm(desc); \ 306 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 307 ctzl(sizeof(ETYPE)), GETPC()); \ 308 } 309 310 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb) 311 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb) 312 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb) 313 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) 314 315 /* 316 * unit-stride: access elements stored contiguously in memory 317 */ 318 319 /* unmasked unit-stride load and store operation */ 320 static inline QEMU_ALWAYS_INLINE void 321 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, 322 uint32_t elems, uint32_t nf, uint32_t max_elems, 323 uint32_t log2_esz, bool is_load, int mmu_index, 324 vext_ldst_elem_fn_tlb *ldst_tlb, 325 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra) 326 { 327 void *host; 328 int i, k, flags; 329 uint32_t esz = 1 << log2_esz; 330 uint32_t size = (elems * nf) << log2_esz; 331 uint32_t evl = env->vstart + elems; 332 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 333 334 /* Check page permission/pmp/watchpoint/etc. */ 335 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type, 336 mmu_index, true, &host, ra); 337 338 if (flags == 0) { 339 if (nf == 1) { 340 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart, 341 host, esz, is_load); 342 } else { 343 for (i = env->vstart; i < evl; ++i) { 344 k = 0; 345 while (k < nf) { 346 ldst_host(vd, i + k * max_elems, host); 347 host += esz; 348 k++; 349 } 350 } 351 } 352 env->vstart += elems; 353 } else { 354 if (nf == 1) { 355 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, 356 ra, esz, is_load); 357 } else { 358 /* load bytes from guest memory */ 359 for (i = env->vstart; i < evl; env->vstart = ++i) { 360 k = 0; 361 while (k < nf) { 362 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 363 vd, ra); 364 addr += esz; 365 k++; 366 } 367 } 368 } 369 } 370 } 371 372 static inline QEMU_ALWAYS_INLINE void 373 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 374 vext_ldst_elem_fn_tlb *ldst_tlb, 375 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 376 uint32_t evl, uintptr_t ra, bool is_load) 377 { 378 uint32_t k; 379 target_ulong page_split, elems, addr; 380 uint32_t nf = vext_nf(desc); 381 uint32_t max_elems = vext_max_elems(desc, log2_esz); 382 uint32_t esz = 1 << log2_esz; 383 uint32_t msize = nf * esz; 384 int mmu_index = riscv_env_mmu_index(env, false); 385 386 if (env->vstart >= evl) { 387 env->vstart = 0; 388 return; 389 } 390 391 #if defined(CONFIG_USER_ONLY) 392 /* 393 * For data sizes <= 6 bytes we get better performance by simply calling 394 * vext_continuous_ldst_tlb 395 */ 396 if (nf == 1 && (evl << log2_esz) <= 6) { 397 addr = base + (env->vstart << log2_esz); 398 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra, 399 esz, is_load); 400 401 env->vstart = 0; 402 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 403 return; 404 } 405 #endif 406 407 /* Calculate the page range of first page */ 408 addr = base + ((env->vstart * nf) << log2_esz); 409 page_split = -(addr | TARGET_PAGE_MASK); 410 /* Get number of elements */ 411 elems = page_split / msize; 412 if (unlikely(env->vstart + elems >= evl)) { 413 elems = evl - env->vstart; 414 } 415 416 /* Load/store elements in the first page */ 417 if (likely(elems)) { 418 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 419 is_load, mmu_index, ldst_tlb, ldst_host, ra); 420 } 421 422 /* Load/store elements in the second page */ 423 if (unlikely(env->vstart < evl)) { 424 /* Cross page element */ 425 if (unlikely(page_split % msize)) { 426 for (k = 0; k < nf; k++) { 427 addr = base + ((env->vstart * nf + k) << log2_esz); 428 ldst_tlb(env, adjust_addr(env, addr), 429 env->vstart + k * max_elems, vd, ra); 430 } 431 env->vstart++; 432 } 433 434 addr = base + ((env->vstart * nf) << log2_esz); 435 /* Get number of elements of second page */ 436 elems = evl - env->vstart; 437 438 /* Load/store elements in the second page */ 439 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 440 is_load, mmu_index, ldst_tlb, ldst_host, ra); 441 } 442 443 env->vstart = 0; 444 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 445 } 446 447 /* 448 * masked unit-stride load and store operation will be a special case of 449 * stride, stride = NF * sizeof (ETYPE) 450 */ 451 452 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 453 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 454 CPURISCVState *env, uint32_t desc) \ 455 { \ 456 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 457 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 458 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 459 } \ 460 \ 461 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 462 CPURISCVState *env, uint32_t desc) \ 463 { \ 464 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 465 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \ 466 } 467 468 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host) 469 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host) 470 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host) 471 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host) 472 473 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 474 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 475 CPURISCVState *env, uint32_t desc) \ 476 { \ 477 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 478 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 479 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 480 } \ 481 \ 482 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 483 CPURISCVState *env, uint32_t desc) \ 484 { \ 485 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 486 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \ 487 } 488 489 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host) 490 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host) 491 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host) 492 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host) 493 494 /* 495 * unit stride mask load and store, EEW = 1 496 */ 497 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 498 CPURISCVState *env, uint32_t desc) 499 { 500 /* evl = ceil(vl/8) */ 501 uint8_t evl = (env->vl + 7) >> 3; 502 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host, 503 0, evl, GETPC(), true); 504 } 505 506 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 507 CPURISCVState *env, uint32_t desc) 508 { 509 /* evl = ceil(vl/8) */ 510 uint8_t evl = (env->vl + 7) >> 3; 511 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host, 512 0, evl, GETPC(), false); 513 } 514 515 /* 516 * index: access vector element from indexed memory 517 */ 518 typedef target_ulong vext_get_index_addr(target_ulong base, 519 uint32_t idx, void *vs2); 520 521 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 522 static target_ulong NAME(target_ulong base, \ 523 uint32_t idx, void *vs2) \ 524 { \ 525 return (base + *((ETYPE *)vs2 + H(idx))); \ 526 } 527 528 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 529 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 530 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 531 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 532 533 static inline void 534 vext_ldst_index(void *vd, void *v0, target_ulong base, 535 void *vs2, CPURISCVState *env, uint32_t desc, 536 vext_get_index_addr get_index_addr, 537 vext_ldst_elem_fn_tlb *ldst_elem, 538 uint32_t log2_esz, uintptr_t ra) 539 { 540 uint32_t i, k; 541 uint32_t nf = vext_nf(desc); 542 uint32_t vm = vext_vm(desc); 543 uint32_t max_elems = vext_max_elems(desc, log2_esz); 544 uint32_t esz = 1 << log2_esz; 545 uint32_t vma = vext_vma(desc); 546 547 VSTART_CHECK_EARLY_EXIT(env); 548 549 /* load bytes from guest memory */ 550 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 551 k = 0; 552 while (k < nf) { 553 if (!vm && !vext_elem_mask(v0, i)) { 554 /* set masked-off elements to 1s */ 555 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 556 (i + k * max_elems + 1) * esz); 557 k++; 558 continue; 559 } 560 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 561 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 562 k++; 563 } 564 } 565 env->vstart = 0; 566 567 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 568 } 569 570 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 571 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 572 void *vs2, CPURISCVState *env, uint32_t desc) \ 573 { \ 574 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 575 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 576 } 577 578 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb) 579 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb) 580 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb) 581 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb) 582 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb) 583 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb) 584 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb) 585 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb) 586 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb) 587 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb) 588 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb) 589 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb) 590 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb) 591 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb) 592 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb) 593 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb) 594 595 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 596 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 597 void *vs2, CPURISCVState *env, uint32_t desc) \ 598 { \ 599 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 600 STORE_FN, ctzl(sizeof(ETYPE)), \ 601 GETPC()); \ 602 } 603 604 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb) 605 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb) 606 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb) 607 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb) 608 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb) 609 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb) 610 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb) 611 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb) 612 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb) 613 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb) 614 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb) 615 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb) 616 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb) 617 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb) 618 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb) 619 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb) 620 621 /* 622 * unit-stride fault-only-fisrt load instructions 623 */ 624 static inline void 625 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, 626 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, 627 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra) 628 { 629 uint32_t i, k, vl = 0; 630 uint32_t nf = vext_nf(desc); 631 uint32_t vm = vext_vm(desc); 632 uint32_t max_elems = vext_max_elems(desc, log2_esz); 633 uint32_t esz = 1 << log2_esz; 634 uint32_t msize = nf * esz; 635 uint32_t vma = vext_vma(desc); 636 target_ulong addr, offset, remain, page_split, elems; 637 int mmu_index = riscv_env_mmu_index(env, false); 638 639 VSTART_CHECK_EARLY_EXIT(env); 640 641 /* probe every access */ 642 for (i = env->vstart; i < env->vl; i++) { 643 if (!vm && !vext_elem_mask(v0, i)) { 644 continue; 645 } 646 addr = adjust_addr(env, base + i * (nf << log2_esz)); 647 if (i == 0) { 648 /* Allow fault on first element. */ 649 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 650 } else { 651 remain = nf << log2_esz; 652 while (remain > 0) { 653 void *host; 654 int flags; 655 656 offset = -(addr | TARGET_PAGE_MASK); 657 658 /* Probe nonfault on subsequent elements. */ 659 flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD, 660 mmu_index, true, &host, 0); 661 662 /* 663 * Stop if invalid (unmapped) or mmio (transaction may fail). 664 * Do not stop if watchpoint, as the spec says that 665 * first-fault should continue to access the same 666 * elements regardless of any watchpoint. 667 */ 668 if (flags & ~TLB_WATCHPOINT) { 669 vl = i; 670 goto ProbeSuccess; 671 } 672 if (remain <= offset) { 673 break; 674 } 675 remain -= offset; 676 addr = adjust_addr(env, addr + offset); 677 } 678 } 679 } 680 ProbeSuccess: 681 /* load bytes from guest memory */ 682 if (vl != 0) { 683 env->vl = vl; 684 } 685 686 if (env->vstart < env->vl) { 687 if (vm) { 688 /* Calculate the page range of first page */ 689 addr = base + ((env->vstart * nf) << log2_esz); 690 page_split = -(addr | TARGET_PAGE_MASK); 691 /* Get number of elements */ 692 elems = page_split / msize; 693 if (unlikely(env->vstart + elems >= env->vl)) { 694 elems = env->vl - env->vstart; 695 } 696 697 /* Load/store elements in the first page */ 698 if (likely(elems)) { 699 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 700 log2_esz, true, mmu_index, ldst_tlb, 701 ldst_host, ra); 702 } 703 704 /* Load/store elements in the second page */ 705 if (unlikely(env->vstart < env->vl)) { 706 /* Cross page element */ 707 if (unlikely(page_split % msize)) { 708 for (k = 0; k < nf; k++) { 709 addr = base + ((env->vstart * nf + k) << log2_esz); 710 ldst_tlb(env, adjust_addr(env, addr), 711 env->vstart + k * max_elems, vd, ra); 712 } 713 env->vstart++; 714 } 715 716 addr = base + ((env->vstart * nf) << log2_esz); 717 /* Get number of elements of second page */ 718 elems = env->vl - env->vstart; 719 720 /* Load/store elements in the second page */ 721 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 722 log2_esz, true, mmu_index, ldst_tlb, 723 ldst_host, ra); 724 } 725 } else { 726 for (i = env->vstart; i < env->vl; i++) { 727 k = 0; 728 while (k < nf) { 729 if (!vext_elem_mask(v0, i)) { 730 /* set masked-off elements to 1s */ 731 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 732 (i + k * max_elems + 1) * esz); 733 k++; 734 continue; 735 } 736 addr = base + ((i * nf + k) << log2_esz); 737 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 738 vd, ra); 739 k++; 740 } 741 } 742 } 743 } 744 env->vstart = 0; 745 746 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 747 } 748 749 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 750 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 751 CPURISCVState *env, uint32_t desc) \ 752 { \ 753 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \ 754 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \ 755 } 756 757 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host) 758 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host) 759 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host) 760 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) 761 762 #define DO_SWAP(N, M) (M) 763 #define DO_AND(N, M) (N & M) 764 #define DO_XOR(N, M) (N ^ M) 765 #define DO_OR(N, M) (N | M) 766 #define DO_ADD(N, M) (N + M) 767 768 /* Signed min/max */ 769 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 770 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 771 772 /* 773 * load and store whole register instructions 774 */ 775 static inline QEMU_ALWAYS_INLINE void 776 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 777 vext_ldst_elem_fn_tlb *ldst_tlb, 778 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 779 uintptr_t ra, bool is_load) 780 { 781 target_ulong page_split, elems, addr; 782 uint32_t nf = vext_nf(desc); 783 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 784 uint32_t max_elems = vlenb >> log2_esz; 785 uint32_t evl = nf * max_elems; 786 uint32_t esz = 1 << log2_esz; 787 int mmu_index = riscv_env_mmu_index(env, false); 788 789 /* Calculate the page range of first page */ 790 addr = base + (env->vstart << log2_esz); 791 page_split = -(addr | TARGET_PAGE_MASK); 792 /* Get number of elements */ 793 elems = page_split / esz; 794 if (unlikely(env->vstart + elems >= evl)) { 795 elems = evl - env->vstart; 796 } 797 798 /* Load/store elements in the first page */ 799 if (likely(elems)) { 800 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 801 is_load, mmu_index, ldst_tlb, ldst_host, ra); 802 } 803 804 /* Load/store elements in the second page */ 805 if (unlikely(env->vstart < evl)) { 806 /* Cross page element */ 807 if (unlikely(page_split % esz)) { 808 addr = base + (env->vstart << log2_esz); 809 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra); 810 env->vstart++; 811 } 812 813 addr = base + (env->vstart << log2_esz); 814 /* Get number of elements of second page */ 815 elems = evl - env->vstart; 816 817 /* Load/store elements in the second page */ 818 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 819 is_load, mmu_index, ldst_tlb, ldst_host, ra); 820 } 821 822 env->vstart = 0; 823 } 824 825 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 826 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 827 uint32_t desc) \ 828 { \ 829 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 830 ctzl(sizeof(ETYPE)), GETPC(), true); \ 831 } 832 833 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host) 834 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host) 835 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host) 836 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host) 837 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host) 838 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host) 839 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host) 840 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host) 841 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host) 842 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host) 843 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host) 844 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host) 845 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host) 846 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host) 847 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host) 848 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host) 849 850 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 851 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 852 uint32_t desc) \ 853 { \ 854 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 855 ctzl(sizeof(ETYPE)), GETPC(), false); \ 856 } 857 858 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host) 859 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host) 860 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host) 861 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host) 862 863 /* 864 * Vector Integer Arithmetic Instructions 865 */ 866 867 /* (TD, T1, T2, TX1, TX2) */ 868 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 869 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 870 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 871 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 872 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 873 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 874 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 875 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 876 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 877 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 878 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 879 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 880 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 881 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 882 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 883 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 884 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 885 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 886 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 887 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 888 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 889 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 890 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 891 892 #define DO_SUB(N, M) (N - M) 893 #define DO_RSUB(N, M) (M - N) 894 895 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 896 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 897 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 898 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 899 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 900 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 901 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 902 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 903 904 GEN_VEXT_VV(vadd_vv_b, 1) 905 GEN_VEXT_VV(vadd_vv_h, 2) 906 GEN_VEXT_VV(vadd_vv_w, 4) 907 GEN_VEXT_VV(vadd_vv_d, 8) 908 GEN_VEXT_VV(vsub_vv_b, 1) 909 GEN_VEXT_VV(vsub_vv_h, 2) 910 GEN_VEXT_VV(vsub_vv_w, 4) 911 GEN_VEXT_VV(vsub_vv_d, 8) 912 913 914 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 915 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 916 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 917 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 918 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 919 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 920 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 921 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 922 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 923 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 924 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 925 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 926 927 GEN_VEXT_VX(vadd_vx_b, 1) 928 GEN_VEXT_VX(vadd_vx_h, 2) 929 GEN_VEXT_VX(vadd_vx_w, 4) 930 GEN_VEXT_VX(vadd_vx_d, 8) 931 GEN_VEXT_VX(vsub_vx_b, 1) 932 GEN_VEXT_VX(vsub_vx_h, 2) 933 GEN_VEXT_VX(vsub_vx_w, 4) 934 GEN_VEXT_VX(vsub_vx_d, 8) 935 GEN_VEXT_VX(vrsub_vx_b, 1) 936 GEN_VEXT_VX(vrsub_vx_h, 2) 937 GEN_VEXT_VX(vrsub_vx_w, 4) 938 GEN_VEXT_VX(vrsub_vx_d, 8) 939 940 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 941 { 942 intptr_t oprsz = simd_oprsz(desc); 943 intptr_t i; 944 945 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 946 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 947 } 948 } 949 950 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 951 { 952 intptr_t oprsz = simd_oprsz(desc); 953 intptr_t i; 954 955 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 956 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 957 } 958 } 959 960 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 961 { 962 intptr_t oprsz = simd_oprsz(desc); 963 intptr_t i; 964 965 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 966 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 967 } 968 } 969 970 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 971 { 972 intptr_t oprsz = simd_oprsz(desc); 973 intptr_t i; 974 975 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 976 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 977 } 978 } 979 980 /* Vector Widening Integer Add/Subtract */ 981 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 982 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 983 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 984 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 985 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 986 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 987 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 988 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 989 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 990 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 991 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 992 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 993 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 994 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 995 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 996 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 997 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 998 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 999 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1000 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1001 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1002 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1003 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1004 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1005 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1006 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1007 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1008 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1009 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1010 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1011 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1012 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1013 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1014 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1015 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1016 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1017 GEN_VEXT_VV(vwaddu_vv_b, 2) 1018 GEN_VEXT_VV(vwaddu_vv_h, 4) 1019 GEN_VEXT_VV(vwaddu_vv_w, 8) 1020 GEN_VEXT_VV(vwsubu_vv_b, 2) 1021 GEN_VEXT_VV(vwsubu_vv_h, 4) 1022 GEN_VEXT_VV(vwsubu_vv_w, 8) 1023 GEN_VEXT_VV(vwadd_vv_b, 2) 1024 GEN_VEXT_VV(vwadd_vv_h, 4) 1025 GEN_VEXT_VV(vwadd_vv_w, 8) 1026 GEN_VEXT_VV(vwsub_vv_b, 2) 1027 GEN_VEXT_VV(vwsub_vv_h, 4) 1028 GEN_VEXT_VV(vwsub_vv_w, 8) 1029 GEN_VEXT_VV(vwaddu_wv_b, 2) 1030 GEN_VEXT_VV(vwaddu_wv_h, 4) 1031 GEN_VEXT_VV(vwaddu_wv_w, 8) 1032 GEN_VEXT_VV(vwsubu_wv_b, 2) 1033 GEN_VEXT_VV(vwsubu_wv_h, 4) 1034 GEN_VEXT_VV(vwsubu_wv_w, 8) 1035 GEN_VEXT_VV(vwadd_wv_b, 2) 1036 GEN_VEXT_VV(vwadd_wv_h, 4) 1037 GEN_VEXT_VV(vwadd_wv_w, 8) 1038 GEN_VEXT_VV(vwsub_wv_b, 2) 1039 GEN_VEXT_VV(vwsub_wv_h, 4) 1040 GEN_VEXT_VV(vwsub_wv_w, 8) 1041 1042 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1043 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1044 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1045 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1046 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1047 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1048 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1049 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1050 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1051 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1052 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1053 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1054 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1055 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1056 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1057 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1058 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1059 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1060 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1061 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1062 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1063 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1064 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1065 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1066 GEN_VEXT_VX(vwaddu_vx_b, 2) 1067 GEN_VEXT_VX(vwaddu_vx_h, 4) 1068 GEN_VEXT_VX(vwaddu_vx_w, 8) 1069 GEN_VEXT_VX(vwsubu_vx_b, 2) 1070 GEN_VEXT_VX(vwsubu_vx_h, 4) 1071 GEN_VEXT_VX(vwsubu_vx_w, 8) 1072 GEN_VEXT_VX(vwadd_vx_b, 2) 1073 GEN_VEXT_VX(vwadd_vx_h, 4) 1074 GEN_VEXT_VX(vwadd_vx_w, 8) 1075 GEN_VEXT_VX(vwsub_vx_b, 2) 1076 GEN_VEXT_VX(vwsub_vx_h, 4) 1077 GEN_VEXT_VX(vwsub_vx_w, 8) 1078 GEN_VEXT_VX(vwaddu_wx_b, 2) 1079 GEN_VEXT_VX(vwaddu_wx_h, 4) 1080 GEN_VEXT_VX(vwaddu_wx_w, 8) 1081 GEN_VEXT_VX(vwsubu_wx_b, 2) 1082 GEN_VEXT_VX(vwsubu_wx_h, 4) 1083 GEN_VEXT_VX(vwsubu_wx_w, 8) 1084 GEN_VEXT_VX(vwadd_wx_b, 2) 1085 GEN_VEXT_VX(vwadd_wx_h, 4) 1086 GEN_VEXT_VX(vwadd_wx_w, 8) 1087 GEN_VEXT_VX(vwsub_wx_b, 2) 1088 GEN_VEXT_VX(vwsub_wx_h, 4) 1089 GEN_VEXT_VX(vwsub_wx_w, 8) 1090 1091 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1092 #define DO_VADC(N, M, C) (N + M + C) 1093 #define DO_VSBC(N, M, C) (N - M - C) 1094 1095 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1096 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1097 CPURISCVState *env, uint32_t desc) \ 1098 { \ 1099 uint32_t vl = env->vl; \ 1100 uint32_t esz = sizeof(ETYPE); \ 1101 uint32_t total_elems = \ 1102 vext_get_total_elems(env, desc, esz); \ 1103 uint32_t vta = vext_vta(desc); \ 1104 uint32_t i; \ 1105 \ 1106 VSTART_CHECK_EARLY_EXIT(env); \ 1107 \ 1108 for (i = env->vstart; i < vl; i++) { \ 1109 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1110 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1111 ETYPE carry = vext_elem_mask(v0, i); \ 1112 \ 1113 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1114 } \ 1115 env->vstart = 0; \ 1116 /* set tail elements to 1s */ \ 1117 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1118 } 1119 1120 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1121 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1122 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1123 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1124 1125 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1126 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1127 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1128 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1129 1130 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1131 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1132 CPURISCVState *env, uint32_t desc) \ 1133 { \ 1134 uint32_t vl = env->vl; \ 1135 uint32_t esz = sizeof(ETYPE); \ 1136 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1137 uint32_t vta = vext_vta(desc); \ 1138 uint32_t i; \ 1139 \ 1140 VSTART_CHECK_EARLY_EXIT(env); \ 1141 \ 1142 for (i = env->vstart; i < vl; i++) { \ 1143 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1144 ETYPE carry = vext_elem_mask(v0, i); \ 1145 \ 1146 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1147 } \ 1148 env->vstart = 0; \ 1149 /* set tail elements to 1s */ \ 1150 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1151 } 1152 1153 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1154 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1155 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1156 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1157 1158 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1159 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1160 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1161 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1162 1163 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1164 (__typeof(N))(N + M) < N) 1165 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1166 1167 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1168 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1169 CPURISCVState *env, uint32_t desc) \ 1170 { \ 1171 uint32_t vl = env->vl; \ 1172 uint32_t vm = vext_vm(desc); \ 1173 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1174 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1175 uint32_t i; \ 1176 \ 1177 VSTART_CHECK_EARLY_EXIT(env); \ 1178 \ 1179 for (i = env->vstart; i < vl; i++) { \ 1180 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1181 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1182 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1183 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1184 } \ 1185 env->vstart = 0; \ 1186 /* 1187 * mask destination register are always tail-agnostic 1188 * set tail elements to 1s 1189 */ \ 1190 if (vta_all_1s) { \ 1191 for (; i < total_elems; i++) { \ 1192 vext_set_elem_mask(vd, i, 1); \ 1193 } \ 1194 } \ 1195 } 1196 1197 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1198 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1199 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1200 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1201 1202 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1203 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1204 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1205 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1206 1207 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1208 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1209 void *vs2, CPURISCVState *env, uint32_t desc) \ 1210 { \ 1211 uint32_t vl = env->vl; \ 1212 uint32_t vm = vext_vm(desc); \ 1213 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1214 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1215 uint32_t i; \ 1216 \ 1217 VSTART_CHECK_EARLY_EXIT(env); \ 1218 \ 1219 for (i = env->vstart; i < vl; i++) { \ 1220 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1221 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1222 vext_set_elem_mask(vd, i, \ 1223 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1224 } \ 1225 env->vstart = 0; \ 1226 /* 1227 * mask destination register are always tail-agnostic 1228 * set tail elements to 1s 1229 */ \ 1230 if (vta_all_1s) { \ 1231 for (; i < total_elems; i++) { \ 1232 vext_set_elem_mask(vd, i, 1); \ 1233 } \ 1234 } \ 1235 } 1236 1237 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1238 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1239 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1240 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1241 1242 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1243 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1244 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1245 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1246 1247 /* Vector Bitwise Logical Instructions */ 1248 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1249 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1250 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1251 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1252 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1253 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1254 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1255 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1256 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1257 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1258 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1259 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1260 GEN_VEXT_VV(vand_vv_b, 1) 1261 GEN_VEXT_VV(vand_vv_h, 2) 1262 GEN_VEXT_VV(vand_vv_w, 4) 1263 GEN_VEXT_VV(vand_vv_d, 8) 1264 GEN_VEXT_VV(vor_vv_b, 1) 1265 GEN_VEXT_VV(vor_vv_h, 2) 1266 GEN_VEXT_VV(vor_vv_w, 4) 1267 GEN_VEXT_VV(vor_vv_d, 8) 1268 GEN_VEXT_VV(vxor_vv_b, 1) 1269 GEN_VEXT_VV(vxor_vv_h, 2) 1270 GEN_VEXT_VV(vxor_vv_w, 4) 1271 GEN_VEXT_VV(vxor_vv_d, 8) 1272 1273 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1274 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1275 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1276 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1277 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1278 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1279 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1280 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1281 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1282 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1283 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1284 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1285 GEN_VEXT_VX(vand_vx_b, 1) 1286 GEN_VEXT_VX(vand_vx_h, 2) 1287 GEN_VEXT_VX(vand_vx_w, 4) 1288 GEN_VEXT_VX(vand_vx_d, 8) 1289 GEN_VEXT_VX(vor_vx_b, 1) 1290 GEN_VEXT_VX(vor_vx_h, 2) 1291 GEN_VEXT_VX(vor_vx_w, 4) 1292 GEN_VEXT_VX(vor_vx_d, 8) 1293 GEN_VEXT_VX(vxor_vx_b, 1) 1294 GEN_VEXT_VX(vxor_vx_h, 2) 1295 GEN_VEXT_VX(vxor_vx_w, 4) 1296 GEN_VEXT_VX(vxor_vx_d, 8) 1297 1298 /* Vector Single-Width Bit Shift Instructions */ 1299 #define DO_SLL(N, M) (N << (M)) 1300 #define DO_SRL(N, M) (N >> (M)) 1301 1302 /* generate the helpers for shift instructions with two vector operators */ 1303 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1304 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1305 void *vs2, CPURISCVState *env, uint32_t desc) \ 1306 { \ 1307 uint32_t vm = vext_vm(desc); \ 1308 uint32_t vl = env->vl; \ 1309 uint32_t esz = sizeof(TS1); \ 1310 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1311 uint32_t vta = vext_vta(desc); \ 1312 uint32_t vma = vext_vma(desc); \ 1313 uint32_t i; \ 1314 \ 1315 VSTART_CHECK_EARLY_EXIT(env); \ 1316 \ 1317 for (i = env->vstart; i < vl; i++) { \ 1318 if (!vm && !vext_elem_mask(v0, i)) { \ 1319 /* set masked-off elements to 1s */ \ 1320 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1321 continue; \ 1322 } \ 1323 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1324 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1325 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1326 } \ 1327 env->vstart = 0; \ 1328 /* set tail elements to 1s */ \ 1329 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1330 } 1331 1332 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1333 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1334 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1335 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1336 1337 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1338 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1339 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1340 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1341 1342 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1343 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1344 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1345 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1346 1347 /* 1348 * generate the helpers for shift instructions with one vector and one scalar 1349 */ 1350 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1351 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1352 void *vs2, CPURISCVState *env, \ 1353 uint32_t desc) \ 1354 { \ 1355 uint32_t vm = vext_vm(desc); \ 1356 uint32_t vl = env->vl; \ 1357 uint32_t esz = sizeof(TD); \ 1358 uint32_t total_elems = \ 1359 vext_get_total_elems(env, desc, esz); \ 1360 uint32_t vta = vext_vta(desc); \ 1361 uint32_t vma = vext_vma(desc); \ 1362 uint32_t i; \ 1363 \ 1364 VSTART_CHECK_EARLY_EXIT(env); \ 1365 \ 1366 for (i = env->vstart; i < vl; i++) { \ 1367 if (!vm && !vext_elem_mask(v0, i)) { \ 1368 /* set masked-off elements to 1s */ \ 1369 vext_set_elems_1s(vd, vma, i * esz, \ 1370 (i + 1) * esz); \ 1371 continue; \ 1372 } \ 1373 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1374 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1375 } \ 1376 env->vstart = 0; \ 1377 /* set tail elements to 1s */ \ 1378 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1379 } 1380 1381 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1382 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1383 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1384 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1385 1386 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1387 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1388 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1389 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1390 1391 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1392 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1393 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1394 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1395 1396 /* Vector Narrowing Integer Right Shift Instructions */ 1397 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1398 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1399 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1400 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1401 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1402 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1403 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1404 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1405 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1406 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1407 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1408 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1409 1410 /* Vector Integer Comparison Instructions */ 1411 #define DO_MSEQ(N, M) (N == M) 1412 #define DO_MSNE(N, M) (N != M) 1413 #define DO_MSLT(N, M) (N < M) 1414 #define DO_MSLE(N, M) (N <= M) 1415 #define DO_MSGT(N, M) (N > M) 1416 1417 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1418 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1419 CPURISCVState *env, uint32_t desc) \ 1420 { \ 1421 uint32_t vm = vext_vm(desc); \ 1422 uint32_t vl = env->vl; \ 1423 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1424 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1425 uint32_t vma = vext_vma(desc); \ 1426 uint32_t i; \ 1427 \ 1428 VSTART_CHECK_EARLY_EXIT(env); \ 1429 \ 1430 for (i = env->vstart; i < vl; i++) { \ 1431 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1432 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1433 if (!vm && !vext_elem_mask(v0, i)) { \ 1434 /* set masked-off elements to 1s */ \ 1435 if (vma) { \ 1436 vext_set_elem_mask(vd, i, 1); \ 1437 } \ 1438 continue; \ 1439 } \ 1440 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1441 } \ 1442 env->vstart = 0; \ 1443 /* 1444 * mask destination register are always tail-agnostic 1445 * set tail elements to 1s 1446 */ \ 1447 if (vta_all_1s) { \ 1448 for (; i < total_elems; i++) { \ 1449 vext_set_elem_mask(vd, i, 1); \ 1450 } \ 1451 } \ 1452 } 1453 1454 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1455 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1456 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1457 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1458 1459 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1460 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1461 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1462 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1463 1464 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1465 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1466 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1467 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1468 1469 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1470 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1471 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1472 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1473 1474 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1475 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1476 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1477 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1478 1479 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1480 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1481 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1482 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1483 1484 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1485 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1486 CPURISCVState *env, uint32_t desc) \ 1487 { \ 1488 uint32_t vm = vext_vm(desc); \ 1489 uint32_t vl = env->vl; \ 1490 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1491 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1492 uint32_t vma = vext_vma(desc); \ 1493 uint32_t i; \ 1494 \ 1495 VSTART_CHECK_EARLY_EXIT(env); \ 1496 \ 1497 for (i = env->vstart; i < vl; i++) { \ 1498 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1499 if (!vm && !vext_elem_mask(v0, i)) { \ 1500 /* set masked-off elements to 1s */ \ 1501 if (vma) { \ 1502 vext_set_elem_mask(vd, i, 1); \ 1503 } \ 1504 continue; \ 1505 } \ 1506 vext_set_elem_mask(vd, i, \ 1507 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1508 } \ 1509 env->vstart = 0; \ 1510 /* 1511 * mask destination register are always tail-agnostic 1512 * set tail elements to 1s 1513 */ \ 1514 if (vta_all_1s) { \ 1515 for (; i < total_elems; i++) { \ 1516 vext_set_elem_mask(vd, i, 1); \ 1517 } \ 1518 } \ 1519 } 1520 1521 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1522 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1523 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1524 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1525 1526 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1527 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1528 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1529 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1530 1531 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1532 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1533 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1534 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1535 1536 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1537 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1538 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1539 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1540 1541 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1542 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1543 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1544 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1545 1546 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1547 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1548 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1549 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1550 1551 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1552 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1553 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1554 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1555 1556 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1557 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1558 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1559 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1560 1561 /* Vector Integer Min/Max Instructions */ 1562 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1563 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1564 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1565 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1566 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1567 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1568 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1569 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1570 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1571 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1572 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1573 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1574 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1575 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1576 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1577 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1578 GEN_VEXT_VV(vminu_vv_b, 1) 1579 GEN_VEXT_VV(vminu_vv_h, 2) 1580 GEN_VEXT_VV(vminu_vv_w, 4) 1581 GEN_VEXT_VV(vminu_vv_d, 8) 1582 GEN_VEXT_VV(vmin_vv_b, 1) 1583 GEN_VEXT_VV(vmin_vv_h, 2) 1584 GEN_VEXT_VV(vmin_vv_w, 4) 1585 GEN_VEXT_VV(vmin_vv_d, 8) 1586 GEN_VEXT_VV(vmaxu_vv_b, 1) 1587 GEN_VEXT_VV(vmaxu_vv_h, 2) 1588 GEN_VEXT_VV(vmaxu_vv_w, 4) 1589 GEN_VEXT_VV(vmaxu_vv_d, 8) 1590 GEN_VEXT_VV(vmax_vv_b, 1) 1591 GEN_VEXT_VV(vmax_vv_h, 2) 1592 GEN_VEXT_VV(vmax_vv_w, 4) 1593 GEN_VEXT_VV(vmax_vv_d, 8) 1594 1595 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1596 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1597 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1598 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1599 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1600 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1601 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1602 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1603 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1604 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1605 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1606 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1607 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1608 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1609 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1610 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1611 GEN_VEXT_VX(vminu_vx_b, 1) 1612 GEN_VEXT_VX(vminu_vx_h, 2) 1613 GEN_VEXT_VX(vminu_vx_w, 4) 1614 GEN_VEXT_VX(vminu_vx_d, 8) 1615 GEN_VEXT_VX(vmin_vx_b, 1) 1616 GEN_VEXT_VX(vmin_vx_h, 2) 1617 GEN_VEXT_VX(vmin_vx_w, 4) 1618 GEN_VEXT_VX(vmin_vx_d, 8) 1619 GEN_VEXT_VX(vmaxu_vx_b, 1) 1620 GEN_VEXT_VX(vmaxu_vx_h, 2) 1621 GEN_VEXT_VX(vmaxu_vx_w, 4) 1622 GEN_VEXT_VX(vmaxu_vx_d, 8) 1623 GEN_VEXT_VX(vmax_vx_b, 1) 1624 GEN_VEXT_VX(vmax_vx_h, 2) 1625 GEN_VEXT_VX(vmax_vx_w, 4) 1626 GEN_VEXT_VX(vmax_vx_d, 8) 1627 1628 /* Vector Single-Width Integer Multiply Instructions */ 1629 #define DO_MUL(N, M) (N * M) 1630 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1631 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1632 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1633 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1634 GEN_VEXT_VV(vmul_vv_b, 1) 1635 GEN_VEXT_VV(vmul_vv_h, 2) 1636 GEN_VEXT_VV(vmul_vv_w, 4) 1637 GEN_VEXT_VV(vmul_vv_d, 8) 1638 1639 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1640 { 1641 return (int16_t)s2 * (int16_t)s1 >> 8; 1642 } 1643 1644 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1645 { 1646 return (int32_t)s2 * (int32_t)s1 >> 16; 1647 } 1648 1649 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1650 { 1651 return (int64_t)s2 * (int64_t)s1 >> 32; 1652 } 1653 1654 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1655 { 1656 uint64_t hi_64, lo_64; 1657 1658 muls64(&lo_64, &hi_64, s1, s2); 1659 return hi_64; 1660 } 1661 1662 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1663 { 1664 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1665 } 1666 1667 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1668 { 1669 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1670 } 1671 1672 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1673 { 1674 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1675 } 1676 1677 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1678 { 1679 uint64_t hi_64, lo_64; 1680 1681 mulu64(&lo_64, &hi_64, s2, s1); 1682 return hi_64; 1683 } 1684 1685 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1686 { 1687 return (int16_t)s2 * (uint16_t)s1 >> 8; 1688 } 1689 1690 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1691 { 1692 return (int32_t)s2 * (uint32_t)s1 >> 16; 1693 } 1694 1695 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1696 { 1697 return (int64_t)s2 * (uint64_t)s1 >> 32; 1698 } 1699 1700 /* 1701 * Let A = signed operand, 1702 * B = unsigned operand 1703 * P = mulu64(A, B), unsigned product 1704 * 1705 * LET X = 2 ** 64 - A, 2's complement of A 1706 * SP = signed product 1707 * THEN 1708 * IF A < 0 1709 * SP = -X * B 1710 * = -(2 ** 64 - A) * B 1711 * = A * B - 2 ** 64 * B 1712 * = P - 2 ** 64 * B 1713 * ELSE 1714 * SP = P 1715 * THEN 1716 * HI_P -= (A < 0 ? B : 0) 1717 */ 1718 1719 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1720 { 1721 uint64_t hi_64, lo_64; 1722 1723 mulu64(&lo_64, &hi_64, s2, s1); 1724 1725 hi_64 -= s2 < 0 ? s1 : 0; 1726 return hi_64; 1727 } 1728 1729 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1730 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1731 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1732 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1733 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1734 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1735 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1736 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1737 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1738 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1739 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1740 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1741 GEN_VEXT_VV(vmulh_vv_b, 1) 1742 GEN_VEXT_VV(vmulh_vv_h, 2) 1743 GEN_VEXT_VV(vmulh_vv_w, 4) 1744 GEN_VEXT_VV(vmulh_vv_d, 8) 1745 GEN_VEXT_VV(vmulhu_vv_b, 1) 1746 GEN_VEXT_VV(vmulhu_vv_h, 2) 1747 GEN_VEXT_VV(vmulhu_vv_w, 4) 1748 GEN_VEXT_VV(vmulhu_vv_d, 8) 1749 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1750 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1751 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1752 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1753 1754 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1755 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1756 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1757 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1758 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1759 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1760 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1761 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1762 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1763 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1764 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1765 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1766 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1767 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1768 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1769 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1770 GEN_VEXT_VX(vmul_vx_b, 1) 1771 GEN_VEXT_VX(vmul_vx_h, 2) 1772 GEN_VEXT_VX(vmul_vx_w, 4) 1773 GEN_VEXT_VX(vmul_vx_d, 8) 1774 GEN_VEXT_VX(vmulh_vx_b, 1) 1775 GEN_VEXT_VX(vmulh_vx_h, 2) 1776 GEN_VEXT_VX(vmulh_vx_w, 4) 1777 GEN_VEXT_VX(vmulh_vx_d, 8) 1778 GEN_VEXT_VX(vmulhu_vx_b, 1) 1779 GEN_VEXT_VX(vmulhu_vx_h, 2) 1780 GEN_VEXT_VX(vmulhu_vx_w, 4) 1781 GEN_VEXT_VX(vmulhu_vx_d, 8) 1782 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1783 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1784 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1785 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1786 1787 /* Vector Integer Divide Instructions */ 1788 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1789 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1790 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1791 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1792 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1793 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1794 1795 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1796 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1797 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1798 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1799 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1800 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1801 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1802 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1803 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1804 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1805 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1806 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1807 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1808 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1809 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1810 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1811 GEN_VEXT_VV(vdivu_vv_b, 1) 1812 GEN_VEXT_VV(vdivu_vv_h, 2) 1813 GEN_VEXT_VV(vdivu_vv_w, 4) 1814 GEN_VEXT_VV(vdivu_vv_d, 8) 1815 GEN_VEXT_VV(vdiv_vv_b, 1) 1816 GEN_VEXT_VV(vdiv_vv_h, 2) 1817 GEN_VEXT_VV(vdiv_vv_w, 4) 1818 GEN_VEXT_VV(vdiv_vv_d, 8) 1819 GEN_VEXT_VV(vremu_vv_b, 1) 1820 GEN_VEXT_VV(vremu_vv_h, 2) 1821 GEN_VEXT_VV(vremu_vv_w, 4) 1822 GEN_VEXT_VV(vremu_vv_d, 8) 1823 GEN_VEXT_VV(vrem_vv_b, 1) 1824 GEN_VEXT_VV(vrem_vv_h, 2) 1825 GEN_VEXT_VV(vrem_vv_w, 4) 1826 GEN_VEXT_VV(vrem_vv_d, 8) 1827 1828 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1829 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1830 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1831 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1832 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1833 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1834 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1835 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1836 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1837 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1838 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1839 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1840 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1841 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1842 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1843 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1844 GEN_VEXT_VX(vdivu_vx_b, 1) 1845 GEN_VEXT_VX(vdivu_vx_h, 2) 1846 GEN_VEXT_VX(vdivu_vx_w, 4) 1847 GEN_VEXT_VX(vdivu_vx_d, 8) 1848 GEN_VEXT_VX(vdiv_vx_b, 1) 1849 GEN_VEXT_VX(vdiv_vx_h, 2) 1850 GEN_VEXT_VX(vdiv_vx_w, 4) 1851 GEN_VEXT_VX(vdiv_vx_d, 8) 1852 GEN_VEXT_VX(vremu_vx_b, 1) 1853 GEN_VEXT_VX(vremu_vx_h, 2) 1854 GEN_VEXT_VX(vremu_vx_w, 4) 1855 GEN_VEXT_VX(vremu_vx_d, 8) 1856 GEN_VEXT_VX(vrem_vx_b, 1) 1857 GEN_VEXT_VX(vrem_vx_h, 2) 1858 GEN_VEXT_VX(vrem_vx_w, 4) 1859 GEN_VEXT_VX(vrem_vx_d, 8) 1860 1861 /* Vector Widening Integer Multiply Instructions */ 1862 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1863 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1864 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1865 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1866 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1867 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1868 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1869 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1870 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1871 GEN_VEXT_VV(vwmul_vv_b, 2) 1872 GEN_VEXT_VV(vwmul_vv_h, 4) 1873 GEN_VEXT_VV(vwmul_vv_w, 8) 1874 GEN_VEXT_VV(vwmulu_vv_b, 2) 1875 GEN_VEXT_VV(vwmulu_vv_h, 4) 1876 GEN_VEXT_VV(vwmulu_vv_w, 8) 1877 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1878 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1879 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1880 1881 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1882 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1883 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1884 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1885 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1886 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1887 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1888 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1889 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1890 GEN_VEXT_VX(vwmul_vx_b, 2) 1891 GEN_VEXT_VX(vwmul_vx_h, 4) 1892 GEN_VEXT_VX(vwmul_vx_w, 8) 1893 GEN_VEXT_VX(vwmulu_vx_b, 2) 1894 GEN_VEXT_VX(vwmulu_vx_h, 4) 1895 GEN_VEXT_VX(vwmulu_vx_w, 8) 1896 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1897 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1898 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1899 1900 /* Vector Single-Width Integer Multiply-Add Instructions */ 1901 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1902 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1903 { \ 1904 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1905 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1906 TD d = *((TD *)vd + HD(i)); \ 1907 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1908 } 1909 1910 #define DO_MACC(N, M, D) (M * N + D) 1911 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1912 #define DO_MADD(N, M, D) (M * D + N) 1913 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1914 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1915 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1916 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1917 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1918 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1919 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1920 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1921 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1922 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1923 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1924 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1925 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1926 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1927 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1928 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1929 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1930 GEN_VEXT_VV(vmacc_vv_b, 1) 1931 GEN_VEXT_VV(vmacc_vv_h, 2) 1932 GEN_VEXT_VV(vmacc_vv_w, 4) 1933 GEN_VEXT_VV(vmacc_vv_d, 8) 1934 GEN_VEXT_VV(vnmsac_vv_b, 1) 1935 GEN_VEXT_VV(vnmsac_vv_h, 2) 1936 GEN_VEXT_VV(vnmsac_vv_w, 4) 1937 GEN_VEXT_VV(vnmsac_vv_d, 8) 1938 GEN_VEXT_VV(vmadd_vv_b, 1) 1939 GEN_VEXT_VV(vmadd_vv_h, 2) 1940 GEN_VEXT_VV(vmadd_vv_w, 4) 1941 GEN_VEXT_VV(vmadd_vv_d, 8) 1942 GEN_VEXT_VV(vnmsub_vv_b, 1) 1943 GEN_VEXT_VV(vnmsub_vv_h, 2) 1944 GEN_VEXT_VV(vnmsub_vv_w, 4) 1945 GEN_VEXT_VV(vnmsub_vv_d, 8) 1946 1947 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1948 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1949 { \ 1950 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1951 TD d = *((TD *)vd + HD(i)); \ 1952 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1953 } 1954 1955 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1956 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1957 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1958 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1959 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1960 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1961 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1962 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1963 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1964 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1965 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1966 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1967 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1968 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1969 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1970 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1971 GEN_VEXT_VX(vmacc_vx_b, 1) 1972 GEN_VEXT_VX(vmacc_vx_h, 2) 1973 GEN_VEXT_VX(vmacc_vx_w, 4) 1974 GEN_VEXT_VX(vmacc_vx_d, 8) 1975 GEN_VEXT_VX(vnmsac_vx_b, 1) 1976 GEN_VEXT_VX(vnmsac_vx_h, 2) 1977 GEN_VEXT_VX(vnmsac_vx_w, 4) 1978 GEN_VEXT_VX(vnmsac_vx_d, 8) 1979 GEN_VEXT_VX(vmadd_vx_b, 1) 1980 GEN_VEXT_VX(vmadd_vx_h, 2) 1981 GEN_VEXT_VX(vmadd_vx_w, 4) 1982 GEN_VEXT_VX(vmadd_vx_d, 8) 1983 GEN_VEXT_VX(vnmsub_vx_b, 1) 1984 GEN_VEXT_VX(vnmsub_vx_h, 2) 1985 GEN_VEXT_VX(vnmsub_vx_w, 4) 1986 GEN_VEXT_VX(vnmsub_vx_d, 8) 1987 1988 /* Vector Widening Integer Multiply-Add Instructions */ 1989 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1990 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1991 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1992 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1993 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1994 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1995 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1996 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1997 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1998 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1999 GEN_VEXT_VV(vwmaccu_vv_h, 4) 2000 GEN_VEXT_VV(vwmaccu_vv_w, 8) 2001 GEN_VEXT_VV(vwmacc_vv_b, 2) 2002 GEN_VEXT_VV(vwmacc_vv_h, 4) 2003 GEN_VEXT_VV(vwmacc_vv_w, 8) 2004 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 2005 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 2006 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 2007 2008 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 2009 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 2010 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 2011 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 2012 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 2013 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 2014 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 2015 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 2016 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 2017 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 2018 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 2019 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 2020 GEN_VEXT_VX(vwmaccu_vx_b, 2) 2021 GEN_VEXT_VX(vwmaccu_vx_h, 4) 2022 GEN_VEXT_VX(vwmaccu_vx_w, 8) 2023 GEN_VEXT_VX(vwmacc_vx_b, 2) 2024 GEN_VEXT_VX(vwmacc_vx_h, 4) 2025 GEN_VEXT_VX(vwmacc_vx_w, 8) 2026 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 2027 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 2028 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 2029 GEN_VEXT_VX(vwmaccus_vx_b, 2) 2030 GEN_VEXT_VX(vwmaccus_vx_h, 4) 2031 GEN_VEXT_VX(vwmaccus_vx_w, 8) 2032 2033 /* Vector Integer Merge and Move Instructions */ 2034 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 2035 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2036 uint32_t desc) \ 2037 { \ 2038 uint32_t vl = env->vl; \ 2039 uint32_t esz = sizeof(ETYPE); \ 2040 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2041 uint32_t vta = vext_vta(desc); \ 2042 uint32_t i; \ 2043 \ 2044 VSTART_CHECK_EARLY_EXIT(env); \ 2045 \ 2046 for (i = env->vstart; i < vl; i++) { \ 2047 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2048 *((ETYPE *)vd + H(i)) = s1; \ 2049 } \ 2050 env->vstart = 0; \ 2051 /* set tail elements to 1s */ \ 2052 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2053 } 2054 2055 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2056 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2057 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2058 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2059 2060 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2061 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2062 uint32_t desc) \ 2063 { \ 2064 uint32_t vl = env->vl; \ 2065 uint32_t esz = sizeof(ETYPE); \ 2066 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2067 uint32_t vta = vext_vta(desc); \ 2068 uint32_t i; \ 2069 \ 2070 VSTART_CHECK_EARLY_EXIT(env); \ 2071 \ 2072 for (i = env->vstart; i < vl; i++) { \ 2073 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2074 } \ 2075 env->vstart = 0; \ 2076 /* set tail elements to 1s */ \ 2077 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2078 } 2079 2080 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2081 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2082 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2083 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2084 2085 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2086 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2087 CPURISCVState *env, uint32_t desc) \ 2088 { \ 2089 uint32_t vl = env->vl; \ 2090 uint32_t esz = sizeof(ETYPE); \ 2091 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2092 uint32_t vta = vext_vta(desc); \ 2093 uint32_t i; \ 2094 \ 2095 VSTART_CHECK_EARLY_EXIT(env); \ 2096 \ 2097 for (i = env->vstart; i < vl; i++) { \ 2098 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2099 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2100 } \ 2101 env->vstart = 0; \ 2102 /* set tail elements to 1s */ \ 2103 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2104 } 2105 2106 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2107 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2108 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2109 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2110 2111 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2112 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2113 void *vs2, CPURISCVState *env, uint32_t desc) \ 2114 { \ 2115 uint32_t vl = env->vl; \ 2116 uint32_t esz = sizeof(ETYPE); \ 2117 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2118 uint32_t vta = vext_vta(desc); \ 2119 uint32_t i; \ 2120 \ 2121 VSTART_CHECK_EARLY_EXIT(env); \ 2122 \ 2123 for (i = env->vstart; i < vl; i++) { \ 2124 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2125 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2126 (ETYPE)(target_long)s1); \ 2127 *((ETYPE *)vd + H(i)) = d; \ 2128 } \ 2129 env->vstart = 0; \ 2130 /* set tail elements to 1s */ \ 2131 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2132 } 2133 2134 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2135 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2136 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2137 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2138 2139 /* 2140 * Vector Fixed-Point Arithmetic Instructions 2141 */ 2142 2143 /* Vector Single-Width Saturating Add and Subtract */ 2144 2145 /* 2146 * As fixed point instructions probably have round mode and saturation, 2147 * define common macros for fixed point here. 2148 */ 2149 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2150 CPURISCVState *env, int vxrm); 2151 2152 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2153 static inline void \ 2154 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2155 CPURISCVState *env, int vxrm) \ 2156 { \ 2157 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2158 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2159 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2160 } 2161 2162 static inline void 2163 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2164 CPURISCVState *env, 2165 uint32_t vl, uint32_t vm, int vxrm, 2166 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2167 { 2168 VSTART_CHECK_EARLY_EXIT(env); 2169 2170 for (uint32_t i = env->vstart; i < vl; i++) { 2171 if (!vm && !vext_elem_mask(v0, i)) { 2172 /* set masked-off elements to 1s */ 2173 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2174 continue; 2175 } 2176 fn(vd, vs1, vs2, i, env, vxrm); 2177 } 2178 env->vstart = 0; 2179 } 2180 2181 static inline void 2182 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2183 CPURISCVState *env, 2184 uint32_t desc, 2185 opivv2_rm_fn *fn, uint32_t esz) 2186 { 2187 uint32_t vm = vext_vm(desc); 2188 uint32_t vl = env->vl; 2189 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2190 uint32_t vta = vext_vta(desc); 2191 uint32_t vma = vext_vma(desc); 2192 2193 switch (env->vxrm) { 2194 case 0: /* rnu */ 2195 vext_vv_rm_1(vd, v0, vs1, vs2, 2196 env, vl, vm, 0, fn, vma, esz); 2197 break; 2198 case 1: /* rne */ 2199 vext_vv_rm_1(vd, v0, vs1, vs2, 2200 env, vl, vm, 1, fn, vma, esz); 2201 break; 2202 case 2: /* rdn */ 2203 vext_vv_rm_1(vd, v0, vs1, vs2, 2204 env, vl, vm, 2, fn, vma, esz); 2205 break; 2206 default: /* rod */ 2207 vext_vv_rm_1(vd, v0, vs1, vs2, 2208 env, vl, vm, 3, fn, vma, esz); 2209 break; 2210 } 2211 /* set tail elements to 1s */ 2212 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2213 } 2214 2215 /* generate helpers for fixed point instructions with OPIVV format */ 2216 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2217 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2218 CPURISCVState *env, uint32_t desc) \ 2219 { \ 2220 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2221 do_##NAME, ESZ); \ 2222 } 2223 2224 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2225 uint8_t b) 2226 { 2227 uint8_t res = a + b; 2228 if (res < a) { 2229 res = UINT8_MAX; 2230 env->vxsat = 0x1; 2231 } 2232 return res; 2233 } 2234 2235 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2236 uint16_t b) 2237 { 2238 uint16_t res = a + b; 2239 if (res < a) { 2240 res = UINT16_MAX; 2241 env->vxsat = 0x1; 2242 } 2243 return res; 2244 } 2245 2246 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2247 uint32_t b) 2248 { 2249 uint32_t res = a + b; 2250 if (res < a) { 2251 res = UINT32_MAX; 2252 env->vxsat = 0x1; 2253 } 2254 return res; 2255 } 2256 2257 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2258 uint64_t b) 2259 { 2260 uint64_t res = a + b; 2261 if (res < a) { 2262 res = UINT64_MAX; 2263 env->vxsat = 0x1; 2264 } 2265 return res; 2266 } 2267 2268 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2269 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2270 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2271 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2272 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2273 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2274 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2275 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2276 2277 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2278 CPURISCVState *env, int vxrm); 2279 2280 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2281 static inline void \ 2282 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2283 CPURISCVState *env, int vxrm) \ 2284 { \ 2285 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2286 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2287 } 2288 2289 static inline void 2290 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2291 CPURISCVState *env, 2292 uint32_t vl, uint32_t vm, int vxrm, 2293 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2294 { 2295 VSTART_CHECK_EARLY_EXIT(env); 2296 2297 for (uint32_t i = env->vstart; i < vl; i++) { 2298 if (!vm && !vext_elem_mask(v0, i)) { 2299 /* set masked-off elements to 1s */ 2300 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2301 continue; 2302 } 2303 fn(vd, s1, vs2, i, env, vxrm); 2304 } 2305 env->vstart = 0; 2306 } 2307 2308 static inline void 2309 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2310 CPURISCVState *env, 2311 uint32_t desc, 2312 opivx2_rm_fn *fn, uint32_t esz) 2313 { 2314 uint32_t vm = vext_vm(desc); 2315 uint32_t vl = env->vl; 2316 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2317 uint32_t vta = vext_vta(desc); 2318 uint32_t vma = vext_vma(desc); 2319 2320 switch (env->vxrm) { 2321 case 0: /* rnu */ 2322 vext_vx_rm_1(vd, v0, s1, vs2, 2323 env, vl, vm, 0, fn, vma, esz); 2324 break; 2325 case 1: /* rne */ 2326 vext_vx_rm_1(vd, v0, s1, vs2, 2327 env, vl, vm, 1, fn, vma, esz); 2328 break; 2329 case 2: /* rdn */ 2330 vext_vx_rm_1(vd, v0, s1, vs2, 2331 env, vl, vm, 2, fn, vma, esz); 2332 break; 2333 default: /* rod */ 2334 vext_vx_rm_1(vd, v0, s1, vs2, 2335 env, vl, vm, 3, fn, vma, esz); 2336 break; 2337 } 2338 /* set tail elements to 1s */ 2339 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2340 } 2341 2342 /* generate helpers for fixed point instructions with OPIVX format */ 2343 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2344 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2345 void *vs2, CPURISCVState *env, \ 2346 uint32_t desc) \ 2347 { \ 2348 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2349 do_##NAME, ESZ); \ 2350 } 2351 2352 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2353 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2354 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2355 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2356 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2357 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2358 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2359 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2360 2361 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2362 { 2363 int8_t res = a + b; 2364 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2365 res = a > 0 ? INT8_MAX : INT8_MIN; 2366 env->vxsat = 0x1; 2367 } 2368 return res; 2369 } 2370 2371 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2372 int16_t b) 2373 { 2374 int16_t res = a + b; 2375 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2376 res = a > 0 ? INT16_MAX : INT16_MIN; 2377 env->vxsat = 0x1; 2378 } 2379 return res; 2380 } 2381 2382 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2383 int32_t b) 2384 { 2385 int32_t res = a + b; 2386 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2387 res = a > 0 ? INT32_MAX : INT32_MIN; 2388 env->vxsat = 0x1; 2389 } 2390 return res; 2391 } 2392 2393 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2394 int64_t b) 2395 { 2396 int64_t res = a + b; 2397 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2398 res = a > 0 ? INT64_MAX : INT64_MIN; 2399 env->vxsat = 0x1; 2400 } 2401 return res; 2402 } 2403 2404 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2405 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2406 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2407 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2408 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2409 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2410 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2411 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2412 2413 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2414 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2415 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2416 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2417 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2418 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2419 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2420 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2421 2422 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2423 uint8_t b) 2424 { 2425 uint8_t res = a - b; 2426 if (res > a) { 2427 res = 0; 2428 env->vxsat = 0x1; 2429 } 2430 return res; 2431 } 2432 2433 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2434 uint16_t b) 2435 { 2436 uint16_t res = a - b; 2437 if (res > a) { 2438 res = 0; 2439 env->vxsat = 0x1; 2440 } 2441 return res; 2442 } 2443 2444 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2445 uint32_t b) 2446 { 2447 uint32_t res = a - b; 2448 if (res > a) { 2449 res = 0; 2450 env->vxsat = 0x1; 2451 } 2452 return res; 2453 } 2454 2455 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2456 uint64_t b) 2457 { 2458 uint64_t res = a - b; 2459 if (res > a) { 2460 res = 0; 2461 env->vxsat = 0x1; 2462 } 2463 return res; 2464 } 2465 2466 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2467 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2468 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2469 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2470 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2471 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2472 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2473 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2474 2475 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2476 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2477 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2478 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2479 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2480 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2481 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2482 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2483 2484 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2485 { 2486 int8_t res = a - b; 2487 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2488 res = a >= 0 ? INT8_MAX : INT8_MIN; 2489 env->vxsat = 0x1; 2490 } 2491 return res; 2492 } 2493 2494 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2495 int16_t b) 2496 { 2497 int16_t res = a - b; 2498 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2499 res = a >= 0 ? INT16_MAX : INT16_MIN; 2500 env->vxsat = 0x1; 2501 } 2502 return res; 2503 } 2504 2505 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2506 int32_t b) 2507 { 2508 int32_t res = a - b; 2509 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2510 res = a >= 0 ? INT32_MAX : INT32_MIN; 2511 env->vxsat = 0x1; 2512 } 2513 return res; 2514 } 2515 2516 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2517 int64_t b) 2518 { 2519 int64_t res = a - b; 2520 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2521 res = a >= 0 ? INT64_MAX : INT64_MIN; 2522 env->vxsat = 0x1; 2523 } 2524 return res; 2525 } 2526 2527 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2528 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2529 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2530 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2531 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2532 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2533 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2534 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2535 2536 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2537 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2538 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2539 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2540 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2541 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2542 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2543 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2544 2545 /* Vector Single-Width Averaging Add and Subtract */ 2546 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2547 { 2548 uint8_t d = extract64(v, shift, 1); 2549 uint8_t d1; 2550 uint64_t D1, D2; 2551 2552 if (shift == 0 || shift > 64) { 2553 return 0; 2554 } 2555 2556 d1 = extract64(v, shift - 1, 1); 2557 D1 = extract64(v, 0, shift); 2558 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2559 return d1; 2560 } else if (vxrm == 1) { /* round-to-nearest-even */ 2561 if (shift > 1) { 2562 D2 = extract64(v, 0, shift - 1); 2563 return d1 & ((D2 != 0) | d); 2564 } else { 2565 return d1 & d; 2566 } 2567 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2568 return !d & (D1 != 0); 2569 } 2570 return 0; /* round-down (truncate) */ 2571 } 2572 2573 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2574 int32_t b) 2575 { 2576 int64_t res = (int64_t)a + b; 2577 uint8_t round = get_round(vxrm, res, 1); 2578 2579 return (res >> 1) + round; 2580 } 2581 2582 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2583 int64_t b) 2584 { 2585 int64_t res = a + b; 2586 uint8_t round = get_round(vxrm, res, 1); 2587 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2588 2589 /* With signed overflow, bit 64 is inverse of bit 63. */ 2590 return ((res >> 1) ^ over) + round; 2591 } 2592 2593 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2594 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2595 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2596 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2597 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2598 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2599 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2600 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2601 2602 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2603 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2604 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2605 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2606 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2607 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2608 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2609 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2610 2611 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2612 uint32_t a, uint32_t b) 2613 { 2614 uint64_t res = (uint64_t)a + b; 2615 uint8_t round = get_round(vxrm, res, 1); 2616 2617 return (res >> 1) + round; 2618 } 2619 2620 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2621 uint64_t a, uint64_t b) 2622 { 2623 uint64_t res = a + b; 2624 uint8_t round = get_round(vxrm, res, 1); 2625 uint64_t over = (uint64_t)(res < a) << 63; 2626 2627 return ((res >> 1) | over) + round; 2628 } 2629 2630 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2631 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2632 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2633 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2634 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2635 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2636 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2637 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2638 2639 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2640 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2641 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2642 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2643 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2644 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2645 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2646 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2647 2648 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2649 int32_t b) 2650 { 2651 int64_t res = (int64_t)a - b; 2652 uint8_t round = get_round(vxrm, res, 1); 2653 2654 return (res >> 1) + round; 2655 } 2656 2657 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2658 int64_t b) 2659 { 2660 int64_t res = (int64_t)a - b; 2661 uint8_t round = get_round(vxrm, res, 1); 2662 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2663 2664 /* With signed overflow, bit 64 is inverse of bit 63. */ 2665 return ((res >> 1) ^ over) + round; 2666 } 2667 2668 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2669 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2670 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2671 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2672 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2673 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2674 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2675 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2676 2677 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2678 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2679 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2680 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2681 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2682 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2683 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2684 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2685 2686 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2687 uint32_t a, uint32_t b) 2688 { 2689 int64_t res = (int64_t)a - b; 2690 uint8_t round = get_round(vxrm, res, 1); 2691 2692 return (res >> 1) + round; 2693 } 2694 2695 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2696 uint64_t a, uint64_t b) 2697 { 2698 uint64_t res = (uint64_t)a - b; 2699 uint8_t round = get_round(vxrm, res, 1); 2700 uint64_t over = (uint64_t)(res > a) << 63; 2701 2702 return ((res >> 1) | over) + round; 2703 } 2704 2705 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2706 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2707 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2708 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2709 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2710 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2711 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2712 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2713 2714 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2715 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2716 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2717 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2718 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2719 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2720 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2721 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2722 2723 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2724 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2725 { 2726 uint8_t round; 2727 int16_t res; 2728 2729 res = (int16_t)a * (int16_t)b; 2730 round = get_round(vxrm, res, 7); 2731 res = (res >> 7) + round; 2732 2733 if (res > INT8_MAX) { 2734 env->vxsat = 0x1; 2735 return INT8_MAX; 2736 } else if (res < INT8_MIN) { 2737 env->vxsat = 0x1; 2738 return INT8_MIN; 2739 } else { 2740 return res; 2741 } 2742 } 2743 2744 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2745 { 2746 uint8_t round; 2747 int32_t res; 2748 2749 res = (int32_t)a * (int32_t)b; 2750 round = get_round(vxrm, res, 15); 2751 res = (res >> 15) + round; 2752 2753 if (res > INT16_MAX) { 2754 env->vxsat = 0x1; 2755 return INT16_MAX; 2756 } else if (res < INT16_MIN) { 2757 env->vxsat = 0x1; 2758 return INT16_MIN; 2759 } else { 2760 return res; 2761 } 2762 } 2763 2764 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2765 { 2766 uint8_t round; 2767 int64_t res; 2768 2769 res = (int64_t)a * (int64_t)b; 2770 round = get_round(vxrm, res, 31); 2771 res = (res >> 31) + round; 2772 2773 if (res > INT32_MAX) { 2774 env->vxsat = 0x1; 2775 return INT32_MAX; 2776 } else if (res < INT32_MIN) { 2777 env->vxsat = 0x1; 2778 return INT32_MIN; 2779 } else { 2780 return res; 2781 } 2782 } 2783 2784 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2785 { 2786 uint8_t round; 2787 uint64_t hi_64, lo_64; 2788 int64_t res; 2789 2790 if (a == INT64_MIN && b == INT64_MIN) { 2791 env->vxsat = 1; 2792 return INT64_MAX; 2793 } 2794 2795 muls64(&lo_64, &hi_64, a, b); 2796 round = get_round(vxrm, lo_64, 63); 2797 /* 2798 * Cannot overflow, as there are always 2799 * 2 sign bits after multiply. 2800 */ 2801 res = (hi_64 << 1) | (lo_64 >> 63); 2802 if (round) { 2803 if (res == INT64_MAX) { 2804 env->vxsat = 1; 2805 } else { 2806 res += 1; 2807 } 2808 } 2809 return res; 2810 } 2811 2812 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2813 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2814 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2815 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2816 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2817 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2818 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2819 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2820 2821 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2822 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2823 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2824 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2825 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2826 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2827 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2828 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2829 2830 /* Vector Single-Width Scaling Shift Instructions */ 2831 static inline uint8_t 2832 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2833 { 2834 uint8_t round, shift = b & 0x7; 2835 uint8_t res; 2836 2837 round = get_round(vxrm, a, shift); 2838 res = (a >> shift) + round; 2839 return res; 2840 } 2841 static inline uint16_t 2842 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2843 { 2844 uint8_t round, shift = b & 0xf; 2845 2846 round = get_round(vxrm, a, shift); 2847 return (a >> shift) + round; 2848 } 2849 static inline uint32_t 2850 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2851 { 2852 uint8_t round, shift = b & 0x1f; 2853 2854 round = get_round(vxrm, a, shift); 2855 return (a >> shift) + round; 2856 } 2857 static inline uint64_t 2858 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2859 { 2860 uint8_t round, shift = b & 0x3f; 2861 2862 round = get_round(vxrm, a, shift); 2863 return (a >> shift) + round; 2864 } 2865 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2866 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2867 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2868 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2869 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2870 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2871 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2872 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2873 2874 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2875 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2876 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2877 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2878 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2879 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2880 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2881 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2882 2883 static inline int8_t 2884 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2885 { 2886 uint8_t round, shift = b & 0x7; 2887 2888 round = get_round(vxrm, a, shift); 2889 return (a >> shift) + round; 2890 } 2891 static inline int16_t 2892 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2893 { 2894 uint8_t round, shift = b & 0xf; 2895 2896 round = get_round(vxrm, a, shift); 2897 return (a >> shift) + round; 2898 } 2899 static inline int32_t 2900 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2901 { 2902 uint8_t round, shift = b & 0x1f; 2903 2904 round = get_round(vxrm, a, shift); 2905 return (a >> shift) + round; 2906 } 2907 static inline int64_t 2908 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2909 { 2910 uint8_t round, shift = b & 0x3f; 2911 2912 round = get_round(vxrm, a, shift); 2913 return (a >> shift) + round; 2914 } 2915 2916 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2917 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2918 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2919 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2920 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2921 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2922 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2923 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2924 2925 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2926 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2927 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2928 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2929 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2930 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2931 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2932 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2933 2934 /* Vector Narrowing Fixed-Point Clip Instructions */ 2935 static inline int8_t 2936 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2937 { 2938 uint8_t round, shift = b & 0xf; 2939 int16_t res; 2940 2941 round = get_round(vxrm, a, shift); 2942 res = (a >> shift) + round; 2943 if (res > INT8_MAX) { 2944 env->vxsat = 0x1; 2945 return INT8_MAX; 2946 } else if (res < INT8_MIN) { 2947 env->vxsat = 0x1; 2948 return INT8_MIN; 2949 } else { 2950 return res; 2951 } 2952 } 2953 2954 static inline int16_t 2955 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2956 { 2957 uint8_t round, shift = b & 0x1f; 2958 int32_t res; 2959 2960 round = get_round(vxrm, a, shift); 2961 res = (a >> shift) + round; 2962 if (res > INT16_MAX) { 2963 env->vxsat = 0x1; 2964 return INT16_MAX; 2965 } else if (res < INT16_MIN) { 2966 env->vxsat = 0x1; 2967 return INT16_MIN; 2968 } else { 2969 return res; 2970 } 2971 } 2972 2973 static inline int32_t 2974 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2975 { 2976 uint8_t round, shift = b & 0x3f; 2977 int64_t res; 2978 2979 round = get_round(vxrm, a, shift); 2980 res = (a >> shift) + round; 2981 if (res > INT32_MAX) { 2982 env->vxsat = 0x1; 2983 return INT32_MAX; 2984 } else if (res < INT32_MIN) { 2985 env->vxsat = 0x1; 2986 return INT32_MIN; 2987 } else { 2988 return res; 2989 } 2990 } 2991 2992 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2993 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2994 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2995 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2996 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2997 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2998 2999 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 3000 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 3001 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 3002 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 3003 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 3004 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 3005 3006 static inline uint8_t 3007 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 3008 { 3009 uint8_t round, shift = b & 0xf; 3010 uint16_t res; 3011 3012 round = get_round(vxrm, a, shift); 3013 res = (a >> shift) + round; 3014 if (res > UINT8_MAX) { 3015 env->vxsat = 0x1; 3016 return UINT8_MAX; 3017 } else { 3018 return res; 3019 } 3020 } 3021 3022 static inline uint16_t 3023 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3024 { 3025 uint8_t round, shift = b & 0x1f; 3026 uint32_t res; 3027 3028 round = get_round(vxrm, a, shift); 3029 res = (a >> shift) + round; 3030 if (res > UINT16_MAX) { 3031 env->vxsat = 0x1; 3032 return UINT16_MAX; 3033 } else { 3034 return res; 3035 } 3036 } 3037 3038 static inline uint32_t 3039 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3040 { 3041 uint8_t round, shift = b & 0x3f; 3042 uint64_t res; 3043 3044 round = get_round(vxrm, a, shift); 3045 res = (a >> shift) + round; 3046 if (res > UINT32_MAX) { 3047 env->vxsat = 0x1; 3048 return UINT32_MAX; 3049 } else { 3050 return res; 3051 } 3052 } 3053 3054 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3055 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3056 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3057 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3058 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3059 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3060 3061 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3062 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3063 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3064 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3065 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3066 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3067 3068 /* 3069 * Vector Float Point Arithmetic Instructions 3070 */ 3071 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3072 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3073 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3074 CPURISCVState *env) \ 3075 { \ 3076 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3077 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3078 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3079 } 3080 3081 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3082 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3083 void *vs2, CPURISCVState *env, \ 3084 uint32_t desc) \ 3085 { \ 3086 uint32_t vm = vext_vm(desc); \ 3087 uint32_t vl = env->vl; \ 3088 uint32_t total_elems = \ 3089 vext_get_total_elems(env, desc, ESZ); \ 3090 uint32_t vta = vext_vta(desc); \ 3091 uint32_t vma = vext_vma(desc); \ 3092 uint32_t i; \ 3093 \ 3094 VSTART_CHECK_EARLY_EXIT(env); \ 3095 \ 3096 for (i = env->vstart; i < vl; i++) { \ 3097 if (!vm && !vext_elem_mask(v0, i)) { \ 3098 /* set masked-off elements to 1s */ \ 3099 vext_set_elems_1s(vd, vma, i * ESZ, \ 3100 (i + 1) * ESZ); \ 3101 continue; \ 3102 } \ 3103 do_##NAME(vd, vs1, vs2, i, env); \ 3104 } \ 3105 env->vstart = 0; \ 3106 /* set tail elements to 1s */ \ 3107 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3108 total_elems * ESZ); \ 3109 } 3110 3111 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3112 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3113 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3114 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3115 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3116 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3117 3118 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3119 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3120 CPURISCVState *env) \ 3121 { \ 3122 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3123 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3124 } 3125 3126 #define GEN_VEXT_VF(NAME, ESZ) \ 3127 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3128 void *vs2, CPURISCVState *env, \ 3129 uint32_t desc) \ 3130 { \ 3131 uint32_t vm = vext_vm(desc); \ 3132 uint32_t vl = env->vl; \ 3133 uint32_t total_elems = \ 3134 vext_get_total_elems(env, desc, ESZ); \ 3135 uint32_t vta = vext_vta(desc); \ 3136 uint32_t vma = vext_vma(desc); \ 3137 uint32_t i; \ 3138 \ 3139 VSTART_CHECK_EARLY_EXIT(env); \ 3140 \ 3141 for (i = env->vstart; i < vl; i++) { \ 3142 if (!vm && !vext_elem_mask(v0, i)) { \ 3143 /* set masked-off elements to 1s */ \ 3144 vext_set_elems_1s(vd, vma, i * ESZ, \ 3145 (i + 1) * ESZ); \ 3146 continue; \ 3147 } \ 3148 do_##NAME(vd, s1, vs2, i, env); \ 3149 } \ 3150 env->vstart = 0; \ 3151 /* set tail elements to 1s */ \ 3152 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3153 total_elems * ESZ); \ 3154 } 3155 3156 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3157 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3158 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3159 GEN_VEXT_VF(vfadd_vf_h, 2) 3160 GEN_VEXT_VF(vfadd_vf_w, 4) 3161 GEN_VEXT_VF(vfadd_vf_d, 8) 3162 3163 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3164 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3165 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3166 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3167 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3168 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3169 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3170 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3171 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3172 GEN_VEXT_VF(vfsub_vf_h, 2) 3173 GEN_VEXT_VF(vfsub_vf_w, 4) 3174 GEN_VEXT_VF(vfsub_vf_d, 8) 3175 3176 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3177 { 3178 return float16_sub(b, a, s); 3179 } 3180 3181 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3182 { 3183 return float32_sub(b, a, s); 3184 } 3185 3186 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3187 { 3188 return float64_sub(b, a, s); 3189 } 3190 3191 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3192 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3193 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3194 GEN_VEXT_VF(vfrsub_vf_h, 2) 3195 GEN_VEXT_VF(vfrsub_vf_w, 4) 3196 GEN_VEXT_VF(vfrsub_vf_d, 8) 3197 3198 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3199 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3200 { 3201 return float32_add(float16_to_float32(a, true, s), 3202 float16_to_float32(b, true, s), s); 3203 } 3204 3205 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3206 { 3207 return float64_add(float32_to_float64(a, s), 3208 float32_to_float64(b, s), s); 3209 3210 } 3211 3212 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3213 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3214 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3215 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3216 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3217 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3218 GEN_VEXT_VF(vfwadd_vf_h, 4) 3219 GEN_VEXT_VF(vfwadd_vf_w, 8) 3220 3221 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3222 { 3223 return float32_sub(float16_to_float32(a, true, s), 3224 float16_to_float32(b, true, s), s); 3225 } 3226 3227 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3228 { 3229 return float64_sub(float32_to_float64(a, s), 3230 float32_to_float64(b, s), s); 3231 3232 } 3233 3234 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3235 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3236 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3237 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3238 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3239 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3240 GEN_VEXT_VF(vfwsub_vf_h, 4) 3241 GEN_VEXT_VF(vfwsub_vf_w, 8) 3242 3243 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3244 { 3245 return float32_add(a, float16_to_float32(b, true, s), s); 3246 } 3247 3248 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3249 { 3250 return float64_add(a, float32_to_float64(b, s), s); 3251 } 3252 3253 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3254 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3255 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3256 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3257 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3258 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3259 GEN_VEXT_VF(vfwadd_wf_h, 4) 3260 GEN_VEXT_VF(vfwadd_wf_w, 8) 3261 3262 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3263 { 3264 return float32_sub(a, float16_to_float32(b, true, s), s); 3265 } 3266 3267 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3268 { 3269 return float64_sub(a, float32_to_float64(b, s), s); 3270 } 3271 3272 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3273 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3274 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3275 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3276 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3277 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3278 GEN_VEXT_VF(vfwsub_wf_h, 4) 3279 GEN_VEXT_VF(vfwsub_wf_w, 8) 3280 3281 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3282 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3283 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3284 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3285 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3286 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3287 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3288 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3289 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3290 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3291 GEN_VEXT_VF(vfmul_vf_h, 2) 3292 GEN_VEXT_VF(vfmul_vf_w, 4) 3293 GEN_VEXT_VF(vfmul_vf_d, 8) 3294 3295 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3296 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3297 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3298 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3299 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3300 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3301 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3302 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3303 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3304 GEN_VEXT_VF(vfdiv_vf_h, 2) 3305 GEN_VEXT_VF(vfdiv_vf_w, 4) 3306 GEN_VEXT_VF(vfdiv_vf_d, 8) 3307 3308 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3309 { 3310 return float16_div(b, a, s); 3311 } 3312 3313 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3314 { 3315 return float32_div(b, a, s); 3316 } 3317 3318 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3319 { 3320 return float64_div(b, a, s); 3321 } 3322 3323 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3324 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3325 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3326 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3327 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3328 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3329 3330 /* Vector Widening Floating-Point Multiply */ 3331 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3332 { 3333 return float32_mul(float16_to_float32(a, true, s), 3334 float16_to_float32(b, true, s), s); 3335 } 3336 3337 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3338 { 3339 return float64_mul(float32_to_float64(a, s), 3340 float32_to_float64(b, s), s); 3341 3342 } 3343 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3344 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3345 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3346 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3347 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3348 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3349 GEN_VEXT_VF(vfwmul_vf_h, 4) 3350 GEN_VEXT_VF(vfwmul_vf_w, 8) 3351 3352 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3353 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3354 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3355 CPURISCVState *env) \ 3356 { \ 3357 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3358 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3359 TD d = *((TD *)vd + HD(i)); \ 3360 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3361 } 3362 3363 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3364 { 3365 return float16_muladd(a, b, d, 0, s); 3366 } 3367 3368 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3369 { 3370 return float32_muladd(a, b, d, 0, s); 3371 } 3372 3373 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3374 { 3375 return float64_muladd(a, b, d, 0, s); 3376 } 3377 3378 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3379 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3380 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3381 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3382 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3383 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3384 3385 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3386 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3387 CPURISCVState *env) \ 3388 { \ 3389 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3390 TD d = *((TD *)vd + HD(i)); \ 3391 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3392 } 3393 3394 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3395 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3396 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3397 GEN_VEXT_VF(vfmacc_vf_h, 2) 3398 GEN_VEXT_VF(vfmacc_vf_w, 4) 3399 GEN_VEXT_VF(vfmacc_vf_d, 8) 3400 3401 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3402 { 3403 return float16_muladd(a, b, d, float_muladd_negate_c | 3404 float_muladd_negate_product, s); 3405 } 3406 3407 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3408 { 3409 return float32_muladd(a, b, d, float_muladd_negate_c | 3410 float_muladd_negate_product, s); 3411 } 3412 3413 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3414 { 3415 return float64_muladd(a, b, d, float_muladd_negate_c | 3416 float_muladd_negate_product, s); 3417 } 3418 3419 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3420 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3421 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3422 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3423 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3424 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3425 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3426 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3427 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3428 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3429 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3430 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3431 3432 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3433 { 3434 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3435 } 3436 3437 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3438 { 3439 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3440 } 3441 3442 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3443 { 3444 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3445 } 3446 3447 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3448 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3449 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3450 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3451 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3452 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3453 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3454 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3455 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3456 GEN_VEXT_VF(vfmsac_vf_h, 2) 3457 GEN_VEXT_VF(vfmsac_vf_w, 4) 3458 GEN_VEXT_VF(vfmsac_vf_d, 8) 3459 3460 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3461 { 3462 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3463 } 3464 3465 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3466 { 3467 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3468 } 3469 3470 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3471 { 3472 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3473 } 3474 3475 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3476 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3477 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3478 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3479 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3480 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3481 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3482 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3483 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3484 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3485 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3486 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3487 3488 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3489 { 3490 return float16_muladd(d, b, a, 0, s); 3491 } 3492 3493 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3494 { 3495 return float32_muladd(d, b, a, 0, s); 3496 } 3497 3498 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3499 { 3500 return float64_muladd(d, b, a, 0, s); 3501 } 3502 3503 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3504 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3505 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3506 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3507 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3508 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3509 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3510 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3511 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3512 GEN_VEXT_VF(vfmadd_vf_h, 2) 3513 GEN_VEXT_VF(vfmadd_vf_w, 4) 3514 GEN_VEXT_VF(vfmadd_vf_d, 8) 3515 3516 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3517 { 3518 return float16_muladd(d, b, a, float_muladd_negate_c | 3519 float_muladd_negate_product, s); 3520 } 3521 3522 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3523 { 3524 return float32_muladd(d, b, a, float_muladd_negate_c | 3525 float_muladd_negate_product, s); 3526 } 3527 3528 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3529 { 3530 return float64_muladd(d, b, a, float_muladd_negate_c | 3531 float_muladd_negate_product, s); 3532 } 3533 3534 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3535 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3536 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3537 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3538 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3539 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3540 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3541 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3542 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3543 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3544 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3545 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3546 3547 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3548 { 3549 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3550 } 3551 3552 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3553 { 3554 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3555 } 3556 3557 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3558 { 3559 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3560 } 3561 3562 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3563 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3564 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3565 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3566 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3567 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3568 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3569 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3570 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3571 GEN_VEXT_VF(vfmsub_vf_h, 2) 3572 GEN_VEXT_VF(vfmsub_vf_w, 4) 3573 GEN_VEXT_VF(vfmsub_vf_d, 8) 3574 3575 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3576 { 3577 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3578 } 3579 3580 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3581 { 3582 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3583 } 3584 3585 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3586 { 3587 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3588 } 3589 3590 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3591 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3592 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3593 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3594 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3595 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3596 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3597 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3598 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3599 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3600 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3601 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3602 3603 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3604 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3605 { 3606 return float32_muladd(float16_to_float32(a, true, s), 3607 float16_to_float32(b, true, s), d, 0, s); 3608 } 3609 3610 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3611 { 3612 return float64_muladd(float32_to_float64(a, s), 3613 float32_to_float64(b, s), d, 0, s); 3614 } 3615 3616 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3617 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3618 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3619 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3620 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3621 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3622 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3623 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3624 3625 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3626 { 3627 return float32_muladd(bfloat16_to_float32(a, s), 3628 bfloat16_to_float32(b, s), d, 0, s); 3629 } 3630 3631 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3632 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3633 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3634 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3635 3636 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3637 { 3638 return float32_muladd(float16_to_float32(a, true, s), 3639 float16_to_float32(b, true, s), d, 3640 float_muladd_negate_c | float_muladd_negate_product, 3641 s); 3642 } 3643 3644 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3645 { 3646 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3647 d, float_muladd_negate_c | 3648 float_muladd_negate_product, s); 3649 } 3650 3651 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3652 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3653 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3654 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3655 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3656 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3657 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3658 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3659 3660 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3661 { 3662 return float32_muladd(float16_to_float32(a, true, s), 3663 float16_to_float32(b, true, s), d, 3664 float_muladd_negate_c, s); 3665 } 3666 3667 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3668 { 3669 return float64_muladd(float32_to_float64(a, s), 3670 float32_to_float64(b, s), d, 3671 float_muladd_negate_c, s); 3672 } 3673 3674 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3675 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3676 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3677 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3678 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3679 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3680 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3681 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3682 3683 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3684 { 3685 return float32_muladd(float16_to_float32(a, true, s), 3686 float16_to_float32(b, true, s), d, 3687 float_muladd_negate_product, s); 3688 } 3689 3690 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3691 { 3692 return float64_muladd(float32_to_float64(a, s), 3693 float32_to_float64(b, s), d, 3694 float_muladd_negate_product, s); 3695 } 3696 3697 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3698 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3699 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3700 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3701 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3702 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3703 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3704 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3705 3706 /* Vector Floating-Point Square-Root Instruction */ 3707 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3708 static void do_##NAME(void *vd, void *vs2, int i, \ 3709 CPURISCVState *env) \ 3710 { \ 3711 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3712 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3713 } 3714 3715 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3716 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3717 CPURISCVState *env, uint32_t desc) \ 3718 { \ 3719 uint32_t vm = vext_vm(desc); \ 3720 uint32_t vl = env->vl; \ 3721 uint32_t total_elems = \ 3722 vext_get_total_elems(env, desc, ESZ); \ 3723 uint32_t vta = vext_vta(desc); \ 3724 uint32_t vma = vext_vma(desc); \ 3725 uint32_t i; \ 3726 \ 3727 VSTART_CHECK_EARLY_EXIT(env); \ 3728 \ 3729 if (vl == 0) { \ 3730 return; \ 3731 } \ 3732 for (i = env->vstart; i < vl; i++) { \ 3733 if (!vm && !vext_elem_mask(v0, i)) { \ 3734 /* set masked-off elements to 1s */ \ 3735 vext_set_elems_1s(vd, vma, i * ESZ, \ 3736 (i + 1) * ESZ); \ 3737 continue; \ 3738 } \ 3739 do_##NAME(vd, vs2, i, env); \ 3740 } \ 3741 env->vstart = 0; \ 3742 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3743 total_elems * ESZ); \ 3744 } 3745 3746 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3747 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3748 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3749 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3750 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3751 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3752 3753 /* 3754 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3755 * 3756 * Adapted from riscv-v-spec recip.c: 3757 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3758 */ 3759 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3760 { 3761 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3762 uint64_t exp = extract64(f, frac_size, exp_size); 3763 uint64_t frac = extract64(f, 0, frac_size); 3764 3765 const uint8_t lookup_table[] = { 3766 52, 51, 50, 48, 47, 46, 44, 43, 3767 42, 41, 40, 39, 38, 36, 35, 34, 3768 33, 32, 31, 30, 30, 29, 28, 27, 3769 26, 25, 24, 23, 23, 22, 21, 20, 3770 19, 19, 18, 17, 16, 16, 15, 14, 3771 14, 13, 12, 12, 11, 10, 10, 9, 3772 9, 8, 7, 7, 6, 6, 5, 4, 3773 4, 3, 3, 2, 2, 1, 1, 0, 3774 127, 125, 123, 121, 119, 118, 116, 114, 3775 113, 111, 109, 108, 106, 105, 103, 102, 3776 100, 99, 97, 96, 95, 93, 92, 91, 3777 90, 88, 87, 86, 85, 84, 83, 82, 3778 80, 79, 78, 77, 76, 75, 74, 73, 3779 72, 71, 70, 70, 69, 68, 67, 66, 3780 65, 64, 63, 63, 62, 61, 60, 59, 3781 59, 58, 57, 56, 56, 55, 54, 53 3782 }; 3783 const int precision = 7; 3784 3785 if (exp == 0 && frac != 0) { /* subnormal */ 3786 /* Normalize the subnormal. */ 3787 while (extract64(frac, frac_size - 1, 1) == 0) { 3788 exp--; 3789 frac <<= 1; 3790 } 3791 3792 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3793 } 3794 3795 int idx = ((exp & 1) << (precision - 1)) | 3796 (frac >> (frac_size - precision + 1)); 3797 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3798 (frac_size - precision); 3799 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3800 3801 uint64_t val = 0; 3802 val = deposit64(val, 0, frac_size, out_frac); 3803 val = deposit64(val, frac_size, exp_size, out_exp); 3804 val = deposit64(val, frac_size + exp_size, 1, sign); 3805 return val; 3806 } 3807 3808 static float16 frsqrt7_h(float16 f, float_status *s) 3809 { 3810 int exp_size = 5, frac_size = 10; 3811 bool sign = float16_is_neg(f); 3812 3813 /* 3814 * frsqrt7(sNaN) = canonical NaN 3815 * frsqrt7(-inf) = canonical NaN 3816 * frsqrt7(-normal) = canonical NaN 3817 * frsqrt7(-subnormal) = canonical NaN 3818 */ 3819 if (float16_is_signaling_nan(f, s) || 3820 (float16_is_infinity(f) && sign) || 3821 (float16_is_normal(f) && sign) || 3822 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3823 s->float_exception_flags |= float_flag_invalid; 3824 return float16_default_nan(s); 3825 } 3826 3827 /* frsqrt7(qNaN) = canonical NaN */ 3828 if (float16_is_quiet_nan(f, s)) { 3829 return float16_default_nan(s); 3830 } 3831 3832 /* frsqrt7(+-0) = +-inf */ 3833 if (float16_is_zero(f)) { 3834 s->float_exception_flags |= float_flag_divbyzero; 3835 return float16_set_sign(float16_infinity, sign); 3836 } 3837 3838 /* frsqrt7(+inf) = +0 */ 3839 if (float16_is_infinity(f) && !sign) { 3840 return float16_set_sign(float16_zero, sign); 3841 } 3842 3843 /* +normal, +subnormal */ 3844 uint64_t val = frsqrt7(f, exp_size, frac_size); 3845 return make_float16(val); 3846 } 3847 3848 static float32 frsqrt7_s(float32 f, float_status *s) 3849 { 3850 int exp_size = 8, frac_size = 23; 3851 bool sign = float32_is_neg(f); 3852 3853 /* 3854 * frsqrt7(sNaN) = canonical NaN 3855 * frsqrt7(-inf) = canonical NaN 3856 * frsqrt7(-normal) = canonical NaN 3857 * frsqrt7(-subnormal) = canonical NaN 3858 */ 3859 if (float32_is_signaling_nan(f, s) || 3860 (float32_is_infinity(f) && sign) || 3861 (float32_is_normal(f) && sign) || 3862 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3863 s->float_exception_flags |= float_flag_invalid; 3864 return float32_default_nan(s); 3865 } 3866 3867 /* frsqrt7(qNaN) = canonical NaN */ 3868 if (float32_is_quiet_nan(f, s)) { 3869 return float32_default_nan(s); 3870 } 3871 3872 /* frsqrt7(+-0) = +-inf */ 3873 if (float32_is_zero(f)) { 3874 s->float_exception_flags |= float_flag_divbyzero; 3875 return float32_set_sign(float32_infinity, sign); 3876 } 3877 3878 /* frsqrt7(+inf) = +0 */ 3879 if (float32_is_infinity(f) && !sign) { 3880 return float32_set_sign(float32_zero, sign); 3881 } 3882 3883 /* +normal, +subnormal */ 3884 uint64_t val = frsqrt7(f, exp_size, frac_size); 3885 return make_float32(val); 3886 } 3887 3888 static float64 frsqrt7_d(float64 f, float_status *s) 3889 { 3890 int exp_size = 11, frac_size = 52; 3891 bool sign = float64_is_neg(f); 3892 3893 /* 3894 * frsqrt7(sNaN) = canonical NaN 3895 * frsqrt7(-inf) = canonical NaN 3896 * frsqrt7(-normal) = canonical NaN 3897 * frsqrt7(-subnormal) = canonical NaN 3898 */ 3899 if (float64_is_signaling_nan(f, s) || 3900 (float64_is_infinity(f) && sign) || 3901 (float64_is_normal(f) && sign) || 3902 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3903 s->float_exception_flags |= float_flag_invalid; 3904 return float64_default_nan(s); 3905 } 3906 3907 /* frsqrt7(qNaN) = canonical NaN */ 3908 if (float64_is_quiet_nan(f, s)) { 3909 return float64_default_nan(s); 3910 } 3911 3912 /* frsqrt7(+-0) = +-inf */ 3913 if (float64_is_zero(f)) { 3914 s->float_exception_flags |= float_flag_divbyzero; 3915 return float64_set_sign(float64_infinity, sign); 3916 } 3917 3918 /* frsqrt7(+inf) = +0 */ 3919 if (float64_is_infinity(f) && !sign) { 3920 return float64_set_sign(float64_zero, sign); 3921 } 3922 3923 /* +normal, +subnormal */ 3924 uint64_t val = frsqrt7(f, exp_size, frac_size); 3925 return make_float64(val); 3926 } 3927 3928 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3929 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3930 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3931 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3932 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3933 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3934 3935 /* 3936 * Vector Floating-Point Reciprocal Estimate Instruction 3937 * 3938 * Adapted from riscv-v-spec recip.c: 3939 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3940 */ 3941 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3942 float_status *s) 3943 { 3944 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3945 uint64_t exp = extract64(f, frac_size, exp_size); 3946 uint64_t frac = extract64(f, 0, frac_size); 3947 3948 const uint8_t lookup_table[] = { 3949 127, 125, 123, 121, 119, 117, 116, 114, 3950 112, 110, 109, 107, 105, 104, 102, 100, 3951 99, 97, 96, 94, 93, 91, 90, 88, 3952 87, 85, 84, 83, 81, 80, 79, 77, 3953 76, 75, 74, 72, 71, 70, 69, 68, 3954 66, 65, 64, 63, 62, 61, 60, 59, 3955 58, 57, 56, 55, 54, 53, 52, 51, 3956 50, 49, 48, 47, 46, 45, 44, 43, 3957 42, 41, 40, 40, 39, 38, 37, 36, 3958 35, 35, 34, 33, 32, 31, 31, 30, 3959 29, 28, 28, 27, 26, 25, 25, 24, 3960 23, 23, 22, 21, 21, 20, 19, 19, 3961 18, 17, 17, 16, 15, 15, 14, 14, 3962 13, 12, 12, 11, 11, 10, 9, 9, 3963 8, 8, 7, 7, 6, 5, 5, 4, 3964 4, 3, 3, 2, 2, 1, 1, 0 3965 }; 3966 const int precision = 7; 3967 3968 if (exp == 0 && frac != 0) { /* subnormal */ 3969 /* Normalize the subnormal. */ 3970 while (extract64(frac, frac_size - 1, 1) == 0) { 3971 exp--; 3972 frac <<= 1; 3973 } 3974 3975 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3976 3977 if (exp != 0 && exp != UINT64_MAX) { 3978 /* 3979 * Overflow to inf or max value of same sign, 3980 * depending on sign and rounding mode. 3981 */ 3982 s->float_exception_flags |= (float_flag_inexact | 3983 float_flag_overflow); 3984 3985 if ((s->float_rounding_mode == float_round_to_zero) || 3986 ((s->float_rounding_mode == float_round_down) && !sign) || 3987 ((s->float_rounding_mode == float_round_up) && sign)) { 3988 /* Return greatest/negative finite value. */ 3989 return (sign << (exp_size + frac_size)) | 3990 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3991 } else { 3992 /* Return +-inf. */ 3993 return (sign << (exp_size + frac_size)) | 3994 MAKE_64BIT_MASK(frac_size, exp_size); 3995 } 3996 } 3997 } 3998 3999 int idx = frac >> (frac_size - precision); 4000 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 4001 (frac_size - precision); 4002 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 4003 4004 if (out_exp == 0 || out_exp == UINT64_MAX) { 4005 /* 4006 * The result is subnormal, but don't raise the underflow exception, 4007 * because there's no additional loss of precision. 4008 */ 4009 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 4010 if (out_exp == UINT64_MAX) { 4011 out_frac >>= 1; 4012 out_exp = 0; 4013 } 4014 } 4015 4016 uint64_t val = 0; 4017 val = deposit64(val, 0, frac_size, out_frac); 4018 val = deposit64(val, frac_size, exp_size, out_exp); 4019 val = deposit64(val, frac_size + exp_size, 1, sign); 4020 return val; 4021 } 4022 4023 static float16 frec7_h(float16 f, float_status *s) 4024 { 4025 int exp_size = 5, frac_size = 10; 4026 bool sign = float16_is_neg(f); 4027 4028 /* frec7(+-inf) = +-0 */ 4029 if (float16_is_infinity(f)) { 4030 return float16_set_sign(float16_zero, sign); 4031 } 4032 4033 /* frec7(+-0) = +-inf */ 4034 if (float16_is_zero(f)) { 4035 s->float_exception_flags |= float_flag_divbyzero; 4036 return float16_set_sign(float16_infinity, sign); 4037 } 4038 4039 /* frec7(sNaN) = canonical NaN */ 4040 if (float16_is_signaling_nan(f, s)) { 4041 s->float_exception_flags |= float_flag_invalid; 4042 return float16_default_nan(s); 4043 } 4044 4045 /* frec7(qNaN) = canonical NaN */ 4046 if (float16_is_quiet_nan(f, s)) { 4047 return float16_default_nan(s); 4048 } 4049 4050 /* +-normal, +-subnormal */ 4051 uint64_t val = frec7(f, exp_size, frac_size, s); 4052 return make_float16(val); 4053 } 4054 4055 static float32 frec7_s(float32 f, float_status *s) 4056 { 4057 int exp_size = 8, frac_size = 23; 4058 bool sign = float32_is_neg(f); 4059 4060 /* frec7(+-inf) = +-0 */ 4061 if (float32_is_infinity(f)) { 4062 return float32_set_sign(float32_zero, sign); 4063 } 4064 4065 /* frec7(+-0) = +-inf */ 4066 if (float32_is_zero(f)) { 4067 s->float_exception_flags |= float_flag_divbyzero; 4068 return float32_set_sign(float32_infinity, sign); 4069 } 4070 4071 /* frec7(sNaN) = canonical NaN */ 4072 if (float32_is_signaling_nan(f, s)) { 4073 s->float_exception_flags |= float_flag_invalid; 4074 return float32_default_nan(s); 4075 } 4076 4077 /* frec7(qNaN) = canonical NaN */ 4078 if (float32_is_quiet_nan(f, s)) { 4079 return float32_default_nan(s); 4080 } 4081 4082 /* +-normal, +-subnormal */ 4083 uint64_t val = frec7(f, exp_size, frac_size, s); 4084 return make_float32(val); 4085 } 4086 4087 static float64 frec7_d(float64 f, float_status *s) 4088 { 4089 int exp_size = 11, frac_size = 52; 4090 bool sign = float64_is_neg(f); 4091 4092 /* frec7(+-inf) = +-0 */ 4093 if (float64_is_infinity(f)) { 4094 return float64_set_sign(float64_zero, sign); 4095 } 4096 4097 /* frec7(+-0) = +-inf */ 4098 if (float64_is_zero(f)) { 4099 s->float_exception_flags |= float_flag_divbyzero; 4100 return float64_set_sign(float64_infinity, sign); 4101 } 4102 4103 /* frec7(sNaN) = canonical NaN */ 4104 if (float64_is_signaling_nan(f, s)) { 4105 s->float_exception_flags |= float_flag_invalid; 4106 return float64_default_nan(s); 4107 } 4108 4109 /* frec7(qNaN) = canonical NaN */ 4110 if (float64_is_quiet_nan(f, s)) { 4111 return float64_default_nan(s); 4112 } 4113 4114 /* +-normal, +-subnormal */ 4115 uint64_t val = frec7(f, exp_size, frac_size, s); 4116 return make_float64(val); 4117 } 4118 4119 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4120 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4121 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4122 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4123 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4124 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4125 4126 /* Vector Floating-Point MIN/MAX Instructions */ 4127 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4128 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4129 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4130 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4131 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4132 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4133 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4134 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4135 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4136 GEN_VEXT_VF(vfmin_vf_h, 2) 4137 GEN_VEXT_VF(vfmin_vf_w, 4) 4138 GEN_VEXT_VF(vfmin_vf_d, 8) 4139 4140 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4141 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4142 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4143 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4144 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4145 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4146 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4147 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4148 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4149 GEN_VEXT_VF(vfmax_vf_h, 2) 4150 GEN_VEXT_VF(vfmax_vf_w, 4) 4151 GEN_VEXT_VF(vfmax_vf_d, 8) 4152 4153 /* Vector Floating-Point Sign-Injection Instructions */ 4154 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4155 { 4156 return deposit64(b, 0, 15, a); 4157 } 4158 4159 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4160 { 4161 return deposit64(b, 0, 31, a); 4162 } 4163 4164 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4165 { 4166 return deposit64(b, 0, 63, a); 4167 } 4168 4169 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4170 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4171 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4172 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4173 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4174 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4175 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4176 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4177 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4178 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4179 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4180 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4181 4182 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4183 { 4184 return deposit64(~b, 0, 15, a); 4185 } 4186 4187 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4188 { 4189 return deposit64(~b, 0, 31, a); 4190 } 4191 4192 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4193 { 4194 return deposit64(~b, 0, 63, a); 4195 } 4196 4197 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4198 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4199 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4200 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4201 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4202 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4203 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4204 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4205 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4206 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4207 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4208 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4209 4210 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4211 { 4212 return deposit64(b ^ a, 0, 15, a); 4213 } 4214 4215 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4216 { 4217 return deposit64(b ^ a, 0, 31, a); 4218 } 4219 4220 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4221 { 4222 return deposit64(b ^ a, 0, 63, a); 4223 } 4224 4225 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4226 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4227 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4228 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4229 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4230 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4231 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4232 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4233 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4234 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4235 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4236 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4237 4238 /* Vector Floating-Point Compare Instructions */ 4239 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4240 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4241 CPURISCVState *env, uint32_t desc) \ 4242 { \ 4243 uint32_t vm = vext_vm(desc); \ 4244 uint32_t vl = env->vl; \ 4245 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4246 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4247 uint32_t vma = vext_vma(desc); \ 4248 uint32_t i; \ 4249 \ 4250 VSTART_CHECK_EARLY_EXIT(env); \ 4251 \ 4252 for (i = env->vstart; i < vl; i++) { \ 4253 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4254 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4255 if (!vm && !vext_elem_mask(v0, i)) { \ 4256 /* set masked-off elements to 1s */ \ 4257 if (vma) { \ 4258 vext_set_elem_mask(vd, i, 1); \ 4259 } \ 4260 continue; \ 4261 } \ 4262 vext_set_elem_mask(vd, i, \ 4263 DO_OP(s2, s1, &env->fp_status)); \ 4264 } \ 4265 env->vstart = 0; \ 4266 /* 4267 * mask destination register are always tail-agnostic 4268 * set tail elements to 1s 4269 */ \ 4270 if (vta_all_1s) { \ 4271 for (; i < total_elems; i++) { \ 4272 vext_set_elem_mask(vd, i, 1); \ 4273 } \ 4274 } \ 4275 } 4276 4277 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4278 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4279 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4280 4281 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4282 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4283 CPURISCVState *env, uint32_t desc) \ 4284 { \ 4285 uint32_t vm = vext_vm(desc); \ 4286 uint32_t vl = env->vl; \ 4287 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4288 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4289 uint32_t vma = vext_vma(desc); \ 4290 uint32_t i; \ 4291 \ 4292 VSTART_CHECK_EARLY_EXIT(env); \ 4293 \ 4294 for (i = env->vstart; i < vl; i++) { \ 4295 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4296 if (!vm && !vext_elem_mask(v0, i)) { \ 4297 /* set masked-off elements to 1s */ \ 4298 if (vma) { \ 4299 vext_set_elem_mask(vd, i, 1); \ 4300 } \ 4301 continue; \ 4302 } \ 4303 vext_set_elem_mask(vd, i, \ 4304 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4305 } \ 4306 env->vstart = 0; \ 4307 /* 4308 * mask destination register are always tail-agnostic 4309 * set tail elements to 1s 4310 */ \ 4311 if (vta_all_1s) { \ 4312 for (; i < total_elems; i++) { \ 4313 vext_set_elem_mask(vd, i, 1); \ 4314 } \ 4315 } \ 4316 } 4317 4318 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4319 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4320 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4321 4322 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4323 { 4324 FloatRelation compare = float16_compare_quiet(a, b, s); 4325 return compare != float_relation_equal; 4326 } 4327 4328 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4329 { 4330 FloatRelation compare = float32_compare_quiet(a, b, s); 4331 return compare != float_relation_equal; 4332 } 4333 4334 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4335 { 4336 FloatRelation compare = float64_compare_quiet(a, b, s); 4337 return compare != float_relation_equal; 4338 } 4339 4340 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4341 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4342 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4343 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4344 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4345 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4346 4347 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4348 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4349 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4350 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4351 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4352 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4353 4354 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4355 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4356 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4357 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4358 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4359 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4360 4361 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4362 { 4363 FloatRelation compare = float16_compare(a, b, s); 4364 return compare == float_relation_greater; 4365 } 4366 4367 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4368 { 4369 FloatRelation compare = float32_compare(a, b, s); 4370 return compare == float_relation_greater; 4371 } 4372 4373 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4374 { 4375 FloatRelation compare = float64_compare(a, b, s); 4376 return compare == float_relation_greater; 4377 } 4378 4379 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4380 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4381 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4382 4383 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4384 { 4385 FloatRelation compare = float16_compare(a, b, s); 4386 return compare == float_relation_greater || 4387 compare == float_relation_equal; 4388 } 4389 4390 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4391 { 4392 FloatRelation compare = float32_compare(a, b, s); 4393 return compare == float_relation_greater || 4394 compare == float_relation_equal; 4395 } 4396 4397 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4398 { 4399 FloatRelation compare = float64_compare(a, b, s); 4400 return compare == float_relation_greater || 4401 compare == float_relation_equal; 4402 } 4403 4404 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4405 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4406 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4407 4408 /* Vector Floating-Point Classify Instruction */ 4409 target_ulong fclass_h(uint64_t frs1) 4410 { 4411 float16 f = frs1; 4412 bool sign = float16_is_neg(f); 4413 4414 if (float16_is_infinity(f)) { 4415 return sign ? 1 << 0 : 1 << 7; 4416 } else if (float16_is_zero(f)) { 4417 return sign ? 1 << 3 : 1 << 4; 4418 } else if (float16_is_zero_or_denormal(f)) { 4419 return sign ? 1 << 2 : 1 << 5; 4420 } else if (float16_is_any_nan(f)) { 4421 float_status s = { }; /* for snan_bit_is_one */ 4422 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4423 } else { 4424 return sign ? 1 << 1 : 1 << 6; 4425 } 4426 } 4427 4428 target_ulong fclass_s(uint64_t frs1) 4429 { 4430 float32 f = frs1; 4431 bool sign = float32_is_neg(f); 4432 4433 if (float32_is_infinity(f)) { 4434 return sign ? 1 << 0 : 1 << 7; 4435 } else if (float32_is_zero(f)) { 4436 return sign ? 1 << 3 : 1 << 4; 4437 } else if (float32_is_zero_or_denormal(f)) { 4438 return sign ? 1 << 2 : 1 << 5; 4439 } else if (float32_is_any_nan(f)) { 4440 float_status s = { }; /* for snan_bit_is_one */ 4441 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4442 } else { 4443 return sign ? 1 << 1 : 1 << 6; 4444 } 4445 } 4446 4447 target_ulong fclass_d(uint64_t frs1) 4448 { 4449 float64 f = frs1; 4450 bool sign = float64_is_neg(f); 4451 4452 if (float64_is_infinity(f)) { 4453 return sign ? 1 << 0 : 1 << 7; 4454 } else if (float64_is_zero(f)) { 4455 return sign ? 1 << 3 : 1 << 4; 4456 } else if (float64_is_zero_or_denormal(f)) { 4457 return sign ? 1 << 2 : 1 << 5; 4458 } else if (float64_is_any_nan(f)) { 4459 float_status s = { }; /* for snan_bit_is_one */ 4460 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4461 } else { 4462 return sign ? 1 << 1 : 1 << 6; 4463 } 4464 } 4465 4466 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4467 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4468 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4469 GEN_VEXT_V(vfclass_v_h, 2) 4470 GEN_VEXT_V(vfclass_v_w, 4) 4471 GEN_VEXT_V(vfclass_v_d, 8) 4472 4473 /* Vector Floating-Point Merge Instruction */ 4474 4475 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4476 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4477 CPURISCVState *env, uint32_t desc) \ 4478 { \ 4479 uint32_t vm = vext_vm(desc); \ 4480 uint32_t vl = env->vl; \ 4481 uint32_t esz = sizeof(ETYPE); \ 4482 uint32_t total_elems = \ 4483 vext_get_total_elems(env, desc, esz); \ 4484 uint32_t vta = vext_vta(desc); \ 4485 uint32_t i; \ 4486 \ 4487 VSTART_CHECK_EARLY_EXIT(env); \ 4488 \ 4489 for (i = env->vstart; i < vl; i++) { \ 4490 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4491 *((ETYPE *)vd + H(i)) = \ 4492 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4493 } \ 4494 env->vstart = 0; \ 4495 /* set tail elements to 1s */ \ 4496 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4497 } 4498 4499 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4500 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4501 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4502 4503 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4504 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4505 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4506 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4507 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4508 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4509 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4510 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4511 4512 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4513 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4514 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4515 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4516 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4517 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4518 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4519 4520 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4521 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4522 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4523 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4524 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4525 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4526 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4527 4528 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4529 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4530 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4531 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4532 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4533 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4534 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4535 4536 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4537 /* (TD, T2, TX2) */ 4538 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4539 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4540 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4541 /* 4542 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4543 */ 4544 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4545 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4546 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4547 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4548 4549 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4550 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4551 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4552 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4553 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4554 4555 /* 4556 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4557 */ 4558 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4559 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4560 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4561 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4562 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4563 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4564 4565 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4566 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4567 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4568 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4569 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4570 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4571 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4572 4573 /* 4574 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4575 */ 4576 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4577 { 4578 return float16_to_float32(a, true, s); 4579 } 4580 4581 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4582 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4583 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4584 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4585 4586 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4587 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4588 4589 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4590 /* (TD, T2, TX2) */ 4591 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4592 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4593 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4594 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4595 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4596 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4597 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4598 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4599 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4600 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4601 4602 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4603 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4604 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4605 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4606 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4607 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4608 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4609 4610 /* 4611 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4612 */ 4613 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4614 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4615 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4616 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4617 4618 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4619 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4620 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4621 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4622 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4623 4624 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4625 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4626 { 4627 return float32_to_float16(a, true, s); 4628 } 4629 4630 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4631 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4632 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4633 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4634 4635 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4636 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4637 4638 /* 4639 * Vector Reduction Operations 4640 */ 4641 /* Vector Single-Width Integer Reduction Instructions */ 4642 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4643 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4644 void *vs2, CPURISCVState *env, \ 4645 uint32_t desc) \ 4646 { \ 4647 uint32_t vm = vext_vm(desc); \ 4648 uint32_t vl = env->vl; \ 4649 uint32_t esz = sizeof(TD); \ 4650 uint32_t vlenb = simd_maxsz(desc); \ 4651 uint32_t vta = vext_vta(desc); \ 4652 uint32_t i; \ 4653 TD s1 = *((TD *)vs1 + HD(0)); \ 4654 \ 4655 for (i = env->vstart; i < vl; i++) { \ 4656 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4657 if (!vm && !vext_elem_mask(v0, i)) { \ 4658 continue; \ 4659 } \ 4660 s1 = OP(s1, (TD)s2); \ 4661 } \ 4662 *((TD *)vd + HD(0)) = s1; \ 4663 env->vstart = 0; \ 4664 /* set tail elements to 1s */ \ 4665 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4666 } 4667 4668 /* vd[0] = sum(vs1[0], vs2[*]) */ 4669 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4670 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4671 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4672 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4673 4674 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4675 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4676 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4677 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4678 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4679 4680 /* vd[0] = max(vs1[0], vs2[*]) */ 4681 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4682 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4683 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4684 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4685 4686 /* vd[0] = minu(vs1[0], vs2[*]) */ 4687 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4688 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4689 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4690 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4691 4692 /* vd[0] = min(vs1[0], vs2[*]) */ 4693 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4694 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4695 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4696 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4697 4698 /* vd[0] = and(vs1[0], vs2[*]) */ 4699 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4700 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4701 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4702 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4703 4704 /* vd[0] = or(vs1[0], vs2[*]) */ 4705 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4706 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4707 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4708 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4709 4710 /* vd[0] = xor(vs1[0], vs2[*]) */ 4711 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4712 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4713 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4714 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4715 4716 /* Vector Widening Integer Reduction Instructions */ 4717 /* signed sum reduction into double-width accumulator */ 4718 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4719 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4720 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4721 4722 /* Unsigned sum reduction into double-width accumulator */ 4723 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4724 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4725 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4726 4727 /* Vector Single-Width Floating-Point Reduction Instructions */ 4728 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4729 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4730 void *vs2, CPURISCVState *env, \ 4731 uint32_t desc) \ 4732 { \ 4733 uint32_t vm = vext_vm(desc); \ 4734 uint32_t vl = env->vl; \ 4735 uint32_t esz = sizeof(TD); \ 4736 uint32_t vlenb = simd_maxsz(desc); \ 4737 uint32_t vta = vext_vta(desc); \ 4738 uint32_t i; \ 4739 TD s1 = *((TD *)vs1 + HD(0)); \ 4740 \ 4741 for (i = env->vstart; i < vl; i++) { \ 4742 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4743 if (!vm && !vext_elem_mask(v0, i)) { \ 4744 continue; \ 4745 } \ 4746 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4747 } \ 4748 *((TD *)vd + HD(0)) = s1; \ 4749 env->vstart = 0; \ 4750 /* set tail elements to 1s */ \ 4751 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4752 } 4753 4754 /* Unordered sum */ 4755 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4756 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4757 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4758 4759 /* Ordered sum */ 4760 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4761 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4762 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4763 4764 /* Maximum value */ 4765 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4766 float16_maximum_number) 4767 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4768 float32_maximum_number) 4769 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4770 float64_maximum_number) 4771 4772 /* Minimum value */ 4773 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4774 float16_minimum_number) 4775 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4776 float32_minimum_number) 4777 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4778 float64_minimum_number) 4779 4780 /* Vector Widening Floating-Point Add Instructions */ 4781 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4782 { 4783 return float32_add(a, float16_to_float32(b, true, s), s); 4784 } 4785 4786 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4787 { 4788 return float64_add(a, float32_to_float64(b, s), s); 4789 } 4790 4791 /* Vector Widening Floating-Point Reduction Instructions */ 4792 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4793 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4794 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4795 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4796 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4797 4798 /* 4799 * Vector Mask Operations 4800 */ 4801 /* Vector Mask-Register Logical Instructions */ 4802 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4803 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4804 void *vs2, CPURISCVState *env, \ 4805 uint32_t desc) \ 4806 { \ 4807 uint32_t vl = env->vl; \ 4808 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4809 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4810 uint32_t i; \ 4811 int a, b; \ 4812 \ 4813 VSTART_CHECK_EARLY_EXIT(env); \ 4814 \ 4815 for (i = env->vstart; i < vl; i++) { \ 4816 a = vext_elem_mask(vs1, i); \ 4817 b = vext_elem_mask(vs2, i); \ 4818 vext_set_elem_mask(vd, i, OP(b, a)); \ 4819 } \ 4820 env->vstart = 0; \ 4821 /* 4822 * mask destination register are always tail-agnostic 4823 * set tail elements to 1s 4824 */ \ 4825 if (vta_all_1s) { \ 4826 for (; i < total_elems; i++) { \ 4827 vext_set_elem_mask(vd, i, 1); \ 4828 } \ 4829 } \ 4830 } 4831 4832 #define DO_NAND(N, M) (!(N & M)) 4833 #define DO_ANDNOT(N, M) (N & !M) 4834 #define DO_NOR(N, M) (!(N | M)) 4835 #define DO_ORNOT(N, M) (N | !M) 4836 #define DO_XNOR(N, M) (!(N ^ M)) 4837 4838 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4839 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4840 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4841 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4842 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4843 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4844 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4845 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4846 4847 /* Vector count population in mask vcpop */ 4848 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4849 uint32_t desc) 4850 { 4851 target_ulong cnt = 0; 4852 uint32_t vm = vext_vm(desc); 4853 uint32_t vl = env->vl; 4854 int i; 4855 4856 for (i = env->vstart; i < vl; i++) { 4857 if (vm || vext_elem_mask(v0, i)) { 4858 if (vext_elem_mask(vs2, i)) { 4859 cnt++; 4860 } 4861 } 4862 } 4863 env->vstart = 0; 4864 return cnt; 4865 } 4866 4867 /* vfirst find-first-set mask bit */ 4868 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4869 uint32_t desc) 4870 { 4871 uint32_t vm = vext_vm(desc); 4872 uint32_t vl = env->vl; 4873 int i; 4874 4875 for (i = env->vstart; i < vl; i++) { 4876 if (vm || vext_elem_mask(v0, i)) { 4877 if (vext_elem_mask(vs2, i)) { 4878 return i; 4879 } 4880 } 4881 } 4882 env->vstart = 0; 4883 return -1LL; 4884 } 4885 4886 enum set_mask_type { 4887 ONLY_FIRST = 1, 4888 INCLUDE_FIRST, 4889 BEFORE_FIRST, 4890 }; 4891 4892 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4893 uint32_t desc, enum set_mask_type type) 4894 { 4895 uint32_t vm = vext_vm(desc); 4896 uint32_t vl = env->vl; 4897 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4898 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4899 uint32_t vma = vext_vma(desc); 4900 int i; 4901 bool first_mask_bit = false; 4902 4903 for (i = env->vstart; i < vl; i++) { 4904 if (!vm && !vext_elem_mask(v0, i)) { 4905 /* set masked-off elements to 1s */ 4906 if (vma) { 4907 vext_set_elem_mask(vd, i, 1); 4908 } 4909 continue; 4910 } 4911 /* write a zero to all following active elements */ 4912 if (first_mask_bit) { 4913 vext_set_elem_mask(vd, i, 0); 4914 continue; 4915 } 4916 if (vext_elem_mask(vs2, i)) { 4917 first_mask_bit = true; 4918 if (type == BEFORE_FIRST) { 4919 vext_set_elem_mask(vd, i, 0); 4920 } else { 4921 vext_set_elem_mask(vd, i, 1); 4922 } 4923 } else { 4924 if (type == ONLY_FIRST) { 4925 vext_set_elem_mask(vd, i, 0); 4926 } else { 4927 vext_set_elem_mask(vd, i, 1); 4928 } 4929 } 4930 } 4931 env->vstart = 0; 4932 /* 4933 * mask destination register are always tail-agnostic 4934 * set tail elements to 1s 4935 */ 4936 if (vta_all_1s) { 4937 for (; i < total_elems; i++) { 4938 vext_set_elem_mask(vd, i, 1); 4939 } 4940 } 4941 } 4942 4943 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4944 uint32_t desc) 4945 { 4946 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4947 } 4948 4949 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4950 uint32_t desc) 4951 { 4952 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4953 } 4954 4955 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4956 uint32_t desc) 4957 { 4958 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4959 } 4960 4961 /* Vector Iota Instruction */ 4962 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4963 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4964 uint32_t desc) \ 4965 { \ 4966 uint32_t vm = vext_vm(desc); \ 4967 uint32_t vl = env->vl; \ 4968 uint32_t esz = sizeof(ETYPE); \ 4969 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4970 uint32_t vta = vext_vta(desc); \ 4971 uint32_t vma = vext_vma(desc); \ 4972 uint32_t sum = 0; \ 4973 int i; \ 4974 \ 4975 for (i = env->vstart; i < vl; i++) { \ 4976 if (!vm && !vext_elem_mask(v0, i)) { \ 4977 /* set masked-off elements to 1s */ \ 4978 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4979 continue; \ 4980 } \ 4981 *((ETYPE *)vd + H(i)) = sum; \ 4982 if (vext_elem_mask(vs2, i)) { \ 4983 sum++; \ 4984 } \ 4985 } \ 4986 env->vstart = 0; \ 4987 /* set tail elements to 1s */ \ 4988 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4989 } 4990 4991 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4992 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4993 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4994 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4995 4996 /* Vector Element Index Instruction */ 4997 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4998 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4999 { \ 5000 uint32_t vm = vext_vm(desc); \ 5001 uint32_t vl = env->vl; \ 5002 uint32_t esz = sizeof(ETYPE); \ 5003 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5004 uint32_t vta = vext_vta(desc); \ 5005 uint32_t vma = vext_vma(desc); \ 5006 int i; \ 5007 \ 5008 VSTART_CHECK_EARLY_EXIT(env); \ 5009 \ 5010 for (i = env->vstart; i < vl; i++) { \ 5011 if (!vm && !vext_elem_mask(v0, i)) { \ 5012 /* set masked-off elements to 1s */ \ 5013 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5014 continue; \ 5015 } \ 5016 *((ETYPE *)vd + H(i)) = i; \ 5017 } \ 5018 env->vstart = 0; \ 5019 /* set tail elements to 1s */ \ 5020 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5021 } 5022 5023 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 5024 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 5025 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 5026 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 5027 5028 /* 5029 * Vector Permutation Instructions 5030 */ 5031 5032 /* Vector Slide Instructions */ 5033 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 5034 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5035 CPURISCVState *env, uint32_t desc) \ 5036 { \ 5037 uint32_t vm = vext_vm(desc); \ 5038 uint32_t vl = env->vl; \ 5039 uint32_t esz = sizeof(ETYPE); \ 5040 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5041 uint32_t vta = vext_vta(desc); \ 5042 uint32_t vma = vext_vma(desc); \ 5043 target_ulong offset = s1, i_min, i; \ 5044 \ 5045 VSTART_CHECK_EARLY_EXIT(env); \ 5046 \ 5047 i_min = MAX(env->vstart, offset); \ 5048 for (i = i_min; i < vl; i++) { \ 5049 if (!vm && !vext_elem_mask(v0, i)) { \ 5050 /* set masked-off elements to 1s */ \ 5051 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5052 continue; \ 5053 } \ 5054 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5055 } \ 5056 env->vstart = 0; \ 5057 /* set tail elements to 1s */ \ 5058 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5059 } 5060 5061 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5062 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5063 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5064 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5065 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5066 5067 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5068 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5069 CPURISCVState *env, uint32_t desc) \ 5070 { \ 5071 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5072 uint32_t vm = vext_vm(desc); \ 5073 uint32_t vl = env->vl; \ 5074 uint32_t esz = sizeof(ETYPE); \ 5075 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5076 uint32_t vta = vext_vta(desc); \ 5077 uint32_t vma = vext_vma(desc); \ 5078 target_ulong i_max, i_min, i; \ 5079 \ 5080 VSTART_CHECK_EARLY_EXIT(env); \ 5081 \ 5082 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 5083 i_max = MAX(i_min, env->vstart); \ 5084 for (i = env->vstart; i < i_max; ++i) { \ 5085 if (!vm && !vext_elem_mask(v0, i)) { \ 5086 /* set masked-off elements to 1s */ \ 5087 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5088 continue; \ 5089 } \ 5090 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5091 } \ 5092 \ 5093 for (i = i_max; i < vl; ++i) { \ 5094 if (vm || vext_elem_mask(v0, i)) { \ 5095 *((ETYPE *)vd + H(i)) = 0; \ 5096 } \ 5097 } \ 5098 \ 5099 env->vstart = 0; \ 5100 /* set tail elements to 1s */ \ 5101 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5102 } 5103 5104 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5105 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5106 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5107 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5108 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5109 5110 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5111 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5112 void *vs2, CPURISCVState *env, \ 5113 uint32_t desc) \ 5114 { \ 5115 typedef uint##BITWIDTH##_t ETYPE; \ 5116 uint32_t vm = vext_vm(desc); \ 5117 uint32_t vl = env->vl; \ 5118 uint32_t esz = sizeof(ETYPE); \ 5119 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5120 uint32_t vta = vext_vta(desc); \ 5121 uint32_t vma = vext_vma(desc); \ 5122 uint32_t i; \ 5123 \ 5124 VSTART_CHECK_EARLY_EXIT(env); \ 5125 \ 5126 for (i = env->vstart; i < vl; i++) { \ 5127 if (!vm && !vext_elem_mask(v0, i)) { \ 5128 /* set masked-off elements to 1s */ \ 5129 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5130 continue; \ 5131 } \ 5132 if (i == 0) { \ 5133 *((ETYPE *)vd + H(i)) = s1; \ 5134 } else { \ 5135 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5136 } \ 5137 } \ 5138 env->vstart = 0; \ 5139 /* set tail elements to 1s */ \ 5140 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5141 } 5142 5143 GEN_VEXT_VSLIE1UP(8, H1) 5144 GEN_VEXT_VSLIE1UP(16, H2) 5145 GEN_VEXT_VSLIE1UP(32, H4) 5146 GEN_VEXT_VSLIE1UP(64, H8) 5147 5148 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5149 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5150 CPURISCVState *env, uint32_t desc) \ 5151 { \ 5152 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5153 } 5154 5155 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5156 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5157 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5158 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5159 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5160 5161 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5162 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5163 void *vs2, CPURISCVState *env, \ 5164 uint32_t desc) \ 5165 { \ 5166 typedef uint##BITWIDTH##_t ETYPE; \ 5167 uint32_t vm = vext_vm(desc); \ 5168 uint32_t vl = env->vl; \ 5169 uint32_t esz = sizeof(ETYPE); \ 5170 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5171 uint32_t vta = vext_vta(desc); \ 5172 uint32_t vma = vext_vma(desc); \ 5173 uint32_t i; \ 5174 \ 5175 VSTART_CHECK_EARLY_EXIT(env); \ 5176 \ 5177 for (i = env->vstart; i < vl; i++) { \ 5178 if (!vm && !vext_elem_mask(v0, i)) { \ 5179 /* set masked-off elements to 1s */ \ 5180 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5181 continue; \ 5182 } \ 5183 if (i == vl - 1) { \ 5184 *((ETYPE *)vd + H(i)) = s1; \ 5185 } else { \ 5186 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5187 } \ 5188 } \ 5189 env->vstart = 0; \ 5190 /* set tail elements to 1s */ \ 5191 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5192 } 5193 5194 GEN_VEXT_VSLIDE1DOWN(8, H1) 5195 GEN_VEXT_VSLIDE1DOWN(16, H2) 5196 GEN_VEXT_VSLIDE1DOWN(32, H4) 5197 GEN_VEXT_VSLIDE1DOWN(64, H8) 5198 5199 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5200 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5201 CPURISCVState *env, uint32_t desc) \ 5202 { \ 5203 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5204 } 5205 5206 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5207 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5208 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5209 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5210 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5211 5212 /* Vector Floating-Point Slide Instructions */ 5213 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5214 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5215 CPURISCVState *env, uint32_t desc) \ 5216 { \ 5217 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5218 } 5219 5220 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5221 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5222 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5223 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5224 5225 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5226 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5227 CPURISCVState *env, uint32_t desc) \ 5228 { \ 5229 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5230 } 5231 5232 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5233 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5234 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5235 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5236 5237 /* Vector Register Gather Instruction */ 5238 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5239 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5240 CPURISCVState *env, uint32_t desc) \ 5241 { \ 5242 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5243 uint32_t vm = vext_vm(desc); \ 5244 uint32_t vl = env->vl; \ 5245 uint32_t esz = sizeof(TS2); \ 5246 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5247 uint32_t vta = vext_vta(desc); \ 5248 uint32_t vma = vext_vma(desc); \ 5249 uint64_t index; \ 5250 uint32_t i; \ 5251 \ 5252 VSTART_CHECK_EARLY_EXIT(env); \ 5253 \ 5254 for (i = env->vstart; i < vl; i++) { \ 5255 if (!vm && !vext_elem_mask(v0, i)) { \ 5256 /* set masked-off elements to 1s */ \ 5257 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5258 continue; \ 5259 } \ 5260 index = *((TS1 *)vs1 + HS1(i)); \ 5261 if (index >= vlmax) { \ 5262 *((TS2 *)vd + HS2(i)) = 0; \ 5263 } else { \ 5264 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5265 } \ 5266 } \ 5267 env->vstart = 0; \ 5268 /* set tail elements to 1s */ \ 5269 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5270 } 5271 5272 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5273 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5274 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5275 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5276 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5277 5278 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5279 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5280 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5281 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5282 5283 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5284 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5285 CPURISCVState *env, uint32_t desc) \ 5286 { \ 5287 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5288 uint32_t vm = vext_vm(desc); \ 5289 uint32_t vl = env->vl; \ 5290 uint32_t esz = sizeof(ETYPE); \ 5291 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5292 uint32_t vta = vext_vta(desc); \ 5293 uint32_t vma = vext_vma(desc); \ 5294 uint64_t index = s1; \ 5295 uint32_t i; \ 5296 \ 5297 VSTART_CHECK_EARLY_EXIT(env); \ 5298 \ 5299 for (i = env->vstart; i < vl; i++) { \ 5300 if (!vm && !vext_elem_mask(v0, i)) { \ 5301 /* set masked-off elements to 1s */ \ 5302 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5303 continue; \ 5304 } \ 5305 if (index >= vlmax) { \ 5306 *((ETYPE *)vd + H(i)) = 0; \ 5307 } else { \ 5308 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5309 } \ 5310 } \ 5311 env->vstart = 0; \ 5312 /* set tail elements to 1s */ \ 5313 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5314 } 5315 5316 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5317 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5318 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5319 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5320 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5321 5322 /* Vector Compress Instruction */ 5323 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5324 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5325 CPURISCVState *env, uint32_t desc) \ 5326 { \ 5327 uint32_t vl = env->vl; \ 5328 uint32_t esz = sizeof(ETYPE); \ 5329 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5330 uint32_t vta = vext_vta(desc); \ 5331 uint32_t num = 0, i; \ 5332 \ 5333 for (i = env->vstart; i < vl; i++) { \ 5334 if (!vext_elem_mask(vs1, i)) { \ 5335 continue; \ 5336 } \ 5337 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5338 num++; \ 5339 } \ 5340 env->vstart = 0; \ 5341 /* set tail elements to 1s */ \ 5342 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ 5343 } 5344 5345 /* Compress into vd elements of vs2 where vs1 is enabled */ 5346 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5347 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5348 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5349 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5350 5351 /* Vector Whole Register Move */ 5352 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5353 { 5354 /* EEW = SEW */ 5355 uint32_t maxsz = simd_maxsz(desc); 5356 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5357 uint32_t startb = env->vstart * sewb; 5358 uint32_t i = startb; 5359 5360 if (startb >= maxsz) { 5361 env->vstart = 0; 5362 return; 5363 } 5364 5365 if (HOST_BIG_ENDIAN && i % 8 != 0) { 5366 uint32_t j = ROUND_UP(i, 8); 5367 memcpy((uint8_t *)vd + H1(j - 1), 5368 (uint8_t *)vs2 + H1(j - 1), 5369 j - i); 5370 i = j; 5371 } 5372 5373 memcpy((uint8_t *)vd + H1(i), 5374 (uint8_t *)vs2 + H1(i), 5375 maxsz - i); 5376 5377 env->vstart = 0; 5378 } 5379 5380 /* Vector Integer Extension */ 5381 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5382 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5383 CPURISCVState *env, uint32_t desc) \ 5384 { \ 5385 uint32_t vl = env->vl; \ 5386 uint32_t vm = vext_vm(desc); \ 5387 uint32_t esz = sizeof(ETYPE); \ 5388 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5389 uint32_t vta = vext_vta(desc); \ 5390 uint32_t vma = vext_vma(desc); \ 5391 uint32_t i; \ 5392 \ 5393 VSTART_CHECK_EARLY_EXIT(env); \ 5394 \ 5395 for (i = env->vstart; i < vl; i++) { \ 5396 if (!vm && !vext_elem_mask(v0, i)) { \ 5397 /* set masked-off elements to 1s */ \ 5398 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5399 continue; \ 5400 } \ 5401 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5402 } \ 5403 env->vstart = 0; \ 5404 /* set tail elements to 1s */ \ 5405 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5406 } 5407 5408 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5409 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5410 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5411 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5412 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5413 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5414 5415 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5416 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5417 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5418 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5419 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5420 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5421