198f40dd2SKiran Ostrolenk /* 298f40dd2SKiran Ostrolenk * RISC-V Vector Extension Internals 398f40dd2SKiran Ostrolenk * 498f40dd2SKiran Ostrolenk * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 598f40dd2SKiran Ostrolenk * 698f40dd2SKiran Ostrolenk * This program is free software; you can redistribute it and/or modify it 798f40dd2SKiran Ostrolenk * under the terms and conditions of the GNU General Public License, 898f40dd2SKiran Ostrolenk * version 2 or later, as published by the Free Software Foundation. 998f40dd2SKiran Ostrolenk * 1098f40dd2SKiran Ostrolenk * This program is distributed in the hope it will be useful, but WITHOUT 1198f40dd2SKiran Ostrolenk * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1298f40dd2SKiran Ostrolenk * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 1398f40dd2SKiran Ostrolenk * more details. 1498f40dd2SKiran Ostrolenk * 1598f40dd2SKiran Ostrolenk * You should have received a copy of the GNU General Public License along with 1698f40dd2SKiran Ostrolenk * this program. If not, see <http://www.gnu.org/licenses/>. 1798f40dd2SKiran Ostrolenk */ 1898f40dd2SKiran Ostrolenk 1998f40dd2SKiran Ostrolenk #ifndef TARGET_RISCV_VECTOR_INTERNALS_H 2098f40dd2SKiran Ostrolenk #define TARGET_RISCV_VECTOR_INTERNALS_H 2198f40dd2SKiran Ostrolenk 2298f40dd2SKiran Ostrolenk #include "qemu/bitops.h" 23*04480a0eSPhilippe Mathieu-Daudé #include "hw/registerfields.h" 2498f40dd2SKiran Ostrolenk #include "cpu.h" 2598f40dd2SKiran Ostrolenk #include "tcg/tcg-gvec-desc.h" 2698f40dd2SKiran Ostrolenk #include "internals.h" 2798f40dd2SKiran Ostrolenk 28df4252b2SDaniel Henrique Barboza #define VSTART_CHECK_EARLY_EXIT(env) do { \ 29df4252b2SDaniel Henrique Barboza if (env->vstart >= env->vl) { \ 30df4252b2SDaniel Henrique Barboza env->vstart = 0; \ 31df4252b2SDaniel Henrique Barboza return; \ 32df4252b2SDaniel Henrique Barboza } \ 33df4252b2SDaniel Henrique Barboza } while (0) 34df4252b2SDaniel Henrique Barboza 3598f40dd2SKiran Ostrolenk static inline uint32_t vext_nf(uint32_t desc) 3698f40dd2SKiran Ostrolenk { 3798f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, NF); 3898f40dd2SKiran Ostrolenk } 3998f40dd2SKiran Ostrolenk 4098f40dd2SKiran Ostrolenk /* 4198f40dd2SKiran Ostrolenk * Note that vector data is stored in host-endian 64-bit chunks, 4298f40dd2SKiran Ostrolenk * so addressing units smaller than that needs a host-endian fixup. 4398f40dd2SKiran Ostrolenk */ 4498f40dd2SKiran Ostrolenk #if HOST_BIG_ENDIAN 4598f40dd2SKiran Ostrolenk #define H1(x) ((x) ^ 7) 4698f40dd2SKiran Ostrolenk #define H1_2(x) ((x) ^ 6) 4798f40dd2SKiran Ostrolenk #define H1_4(x) ((x) ^ 4) 4898f40dd2SKiran Ostrolenk #define H2(x) ((x) ^ 3) 4998f40dd2SKiran Ostrolenk #define H4(x) ((x) ^ 1) 5098f40dd2SKiran Ostrolenk #define H8(x) ((x)) 5198f40dd2SKiran Ostrolenk #else 5298f40dd2SKiran Ostrolenk #define H1(x) (x) 5398f40dd2SKiran Ostrolenk #define H1_2(x) (x) 5498f40dd2SKiran Ostrolenk #define H1_4(x) (x) 5598f40dd2SKiran Ostrolenk #define H2(x) (x) 5698f40dd2SKiran Ostrolenk #define H4(x) (x) 5798f40dd2SKiran Ostrolenk #define H8(x) (x) 5898f40dd2SKiran Ostrolenk #endif 5998f40dd2SKiran Ostrolenk 6098f40dd2SKiran Ostrolenk /* 6198f40dd2SKiran Ostrolenk * Encode LMUL to lmul as following: 6298f40dd2SKiran Ostrolenk * LMUL vlmul lmul 6398f40dd2SKiran Ostrolenk * 1 000 0 6498f40dd2SKiran Ostrolenk * 2 001 1 6598f40dd2SKiran Ostrolenk * 4 010 2 6698f40dd2SKiran Ostrolenk * 8 011 3 6798f40dd2SKiran Ostrolenk * - 100 - 6898f40dd2SKiran Ostrolenk * 1/8 101 -3 6998f40dd2SKiran Ostrolenk * 1/4 110 -2 7098f40dd2SKiran Ostrolenk * 1/2 111 -1 7198f40dd2SKiran Ostrolenk */ 7298f40dd2SKiran Ostrolenk static inline int32_t vext_lmul(uint32_t desc) 7398f40dd2SKiran Ostrolenk { 7498f40dd2SKiran Ostrolenk return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 7598f40dd2SKiran Ostrolenk } 7698f40dd2SKiran Ostrolenk 7798f40dd2SKiran Ostrolenk static inline uint32_t vext_vm(uint32_t desc) 7898f40dd2SKiran Ostrolenk { 7998f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VM); 8098f40dd2SKiran Ostrolenk } 8198f40dd2SKiran Ostrolenk 8298f40dd2SKiran Ostrolenk static inline uint32_t vext_vma(uint32_t desc) 8398f40dd2SKiran Ostrolenk { 8498f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VMA); 8598f40dd2SKiran Ostrolenk } 8698f40dd2SKiran Ostrolenk 8798f40dd2SKiran Ostrolenk static inline uint32_t vext_vta(uint32_t desc) 8898f40dd2SKiran Ostrolenk { 8998f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VTA); 9098f40dd2SKiran Ostrolenk } 9198f40dd2SKiran Ostrolenk 9298f40dd2SKiran Ostrolenk static inline uint32_t vext_vta_all_1s(uint32_t desc) 9398f40dd2SKiran Ostrolenk { 9498f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 9598f40dd2SKiran Ostrolenk } 9698f40dd2SKiran Ostrolenk 9798f40dd2SKiran Ostrolenk /* 9898f40dd2SKiran Ostrolenk * Earlier designs (pre-0.9) had a varying number of bits 9998f40dd2SKiran Ostrolenk * per mask value (MLEN). In the 0.9 design, MLEN=1. 10098f40dd2SKiran Ostrolenk * (Section 4.5) 10198f40dd2SKiran Ostrolenk */ 10298f40dd2SKiran Ostrolenk static inline int vext_elem_mask(void *v0, int index) 10398f40dd2SKiran Ostrolenk { 10498f40dd2SKiran Ostrolenk int idx = index / 64; 10598f40dd2SKiran Ostrolenk int pos = index % 64; 10698f40dd2SKiran Ostrolenk return (((uint64_t *)v0)[idx] >> pos) & 1; 10798f40dd2SKiran Ostrolenk } 10898f40dd2SKiran Ostrolenk 10998f40dd2SKiran Ostrolenk /* 11098f40dd2SKiran Ostrolenk * Get number of total elements, including prestart, body and tail elements. 11198f40dd2SKiran Ostrolenk * Note that when LMUL < 1, the tail includes the elements past VLMAX that 11298f40dd2SKiran Ostrolenk * are held in the same vector register. 11398f40dd2SKiran Ostrolenk */ 11498f40dd2SKiran Ostrolenk static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 11598f40dd2SKiran Ostrolenk uint32_t esz) 11698f40dd2SKiran Ostrolenk { 11798f40dd2SKiran Ostrolenk uint32_t vlenb = simd_maxsz(desc); 11898f40dd2SKiran Ostrolenk uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 11998f40dd2SKiran Ostrolenk int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 12098f40dd2SKiran Ostrolenk ctzl(esz) - ctzl(sew) + vext_lmul(desc); 12198f40dd2SKiran Ostrolenk return (vlenb << emul) / esz; 12298f40dd2SKiran Ostrolenk } 12398f40dd2SKiran Ostrolenk 12498f40dd2SKiran Ostrolenk /* set agnostic elements to 1s */ 12598f40dd2SKiran Ostrolenk void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 12698f40dd2SKiran Ostrolenk uint32_t tot); 12798f40dd2SKiran Ostrolenk 12898f40dd2SKiran Ostrolenk /* expand macro args before macro */ 12998f40dd2SKiran Ostrolenk #define RVVCALL(macro, ...) macro(__VA_ARGS__) 13098f40dd2SKiran Ostrolenk 1312152e48bSKiran Ostrolenk /* (TD, T2, TX2) */ 1322152e48bSKiran Ostrolenk #define OP_UU_B uint8_t, uint8_t, uint8_t 1332152e48bSKiran Ostrolenk #define OP_UU_H uint16_t, uint16_t, uint16_t 1342152e48bSKiran Ostrolenk #define OP_UU_W uint32_t, uint32_t, uint32_t 1352152e48bSKiran Ostrolenk #define OP_UU_D uint64_t, uint64_t, uint64_t 1362152e48bSKiran Ostrolenk 13798f40dd2SKiran Ostrolenk /* (TD, T1, T2, TX1, TX2) */ 13898f40dd2SKiran Ostrolenk #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 13998f40dd2SKiran Ostrolenk #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 14098f40dd2SKiran Ostrolenk #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 14198f40dd2SKiran Ostrolenk #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 14298f40dd2SKiran Ostrolenk 1432152e48bSKiran Ostrolenk #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 1442152e48bSKiran Ostrolenk static void do_##NAME(void *vd, void *vs2, int i) \ 1452152e48bSKiran Ostrolenk { \ 1462152e48bSKiran Ostrolenk TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1472152e48bSKiran Ostrolenk *((TD *)vd + HD(i)) = OP(s2); \ 1482152e48bSKiran Ostrolenk } 1492152e48bSKiran Ostrolenk 1502152e48bSKiran Ostrolenk #define GEN_VEXT_V(NAME, ESZ) \ 1512152e48bSKiran Ostrolenk void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 1522152e48bSKiran Ostrolenk CPURISCVState *env, uint32_t desc) \ 1532152e48bSKiran Ostrolenk { \ 1542152e48bSKiran Ostrolenk uint32_t vm = vext_vm(desc); \ 1552152e48bSKiran Ostrolenk uint32_t vl = env->vl; \ 1562152e48bSKiran Ostrolenk uint32_t total_elems = \ 1572152e48bSKiran Ostrolenk vext_get_total_elems(env, desc, ESZ); \ 1582152e48bSKiran Ostrolenk uint32_t vta = vext_vta(desc); \ 1592152e48bSKiran Ostrolenk uint32_t vma = vext_vma(desc); \ 1602152e48bSKiran Ostrolenk uint32_t i; \ 1612152e48bSKiran Ostrolenk \ 162df4252b2SDaniel Henrique Barboza VSTART_CHECK_EARLY_EXIT(env); \ 163df4252b2SDaniel Henrique Barboza \ 1642152e48bSKiran Ostrolenk for (i = env->vstart; i < vl; i++) { \ 1652152e48bSKiran Ostrolenk if (!vm && !vext_elem_mask(v0, i)) { \ 1662152e48bSKiran Ostrolenk /* set masked-off elements to 1s */ \ 1672152e48bSKiran Ostrolenk vext_set_elems_1s(vd, vma, i * ESZ, \ 1682152e48bSKiran Ostrolenk (i + 1) * ESZ); \ 1692152e48bSKiran Ostrolenk continue; \ 1702152e48bSKiran Ostrolenk } \ 1712152e48bSKiran Ostrolenk do_##NAME(vd, vs2, i); \ 1722152e48bSKiran Ostrolenk } \ 1732152e48bSKiran Ostrolenk env->vstart = 0; \ 1742152e48bSKiran Ostrolenk /* set tail elements to 1s */ \ 1752152e48bSKiran Ostrolenk vext_set_elems_1s(vd, vta, vl * ESZ, \ 1762152e48bSKiran Ostrolenk total_elems * ESZ); \ 1772152e48bSKiran Ostrolenk } 1782152e48bSKiran Ostrolenk 17998f40dd2SKiran Ostrolenk /* operation of two vector elements */ 18098f40dd2SKiran Ostrolenk typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 18198f40dd2SKiran Ostrolenk 18298f40dd2SKiran Ostrolenk #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 18398f40dd2SKiran Ostrolenk static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 18498f40dd2SKiran Ostrolenk { \ 18598f40dd2SKiran Ostrolenk TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 18698f40dd2SKiran Ostrolenk TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 18798f40dd2SKiran Ostrolenk *((TD *)vd + HD(i)) = OP(s2, s1); \ 18898f40dd2SKiran Ostrolenk } 18998f40dd2SKiran Ostrolenk 19098f40dd2SKiran Ostrolenk void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 19198f40dd2SKiran Ostrolenk CPURISCVState *env, uint32_t desc, 19298f40dd2SKiran Ostrolenk opivv2_fn *fn, uint32_t esz); 19398f40dd2SKiran Ostrolenk 19498f40dd2SKiran Ostrolenk /* generate the helpers for OPIVV */ 19598f40dd2SKiran Ostrolenk #define GEN_VEXT_VV(NAME, ESZ) \ 19698f40dd2SKiran Ostrolenk void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 19798f40dd2SKiran Ostrolenk void *vs2, CPURISCVState *env, \ 19898f40dd2SKiran Ostrolenk uint32_t desc) \ 19998f40dd2SKiran Ostrolenk { \ 20098f40dd2SKiran Ostrolenk do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 20198f40dd2SKiran Ostrolenk do_##NAME, ESZ); \ 20298f40dd2SKiran Ostrolenk } 20398f40dd2SKiran Ostrolenk 20498f40dd2SKiran Ostrolenk typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 20598f40dd2SKiran Ostrolenk 20698f40dd2SKiran Ostrolenk /* 20798f40dd2SKiran Ostrolenk * (T1)s1 gives the real operator type. 20898f40dd2SKiran Ostrolenk * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 20998f40dd2SKiran Ostrolenk */ 21098f40dd2SKiran Ostrolenk #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 21198f40dd2SKiran Ostrolenk static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 21298f40dd2SKiran Ostrolenk { \ 21398f40dd2SKiran Ostrolenk TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 21498f40dd2SKiran Ostrolenk *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 21598f40dd2SKiran Ostrolenk } 21698f40dd2SKiran Ostrolenk 21798f40dd2SKiran Ostrolenk void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 21898f40dd2SKiran Ostrolenk CPURISCVState *env, uint32_t desc, 21998f40dd2SKiran Ostrolenk opivx2_fn fn, uint32_t esz); 22098f40dd2SKiran Ostrolenk 22198f40dd2SKiran Ostrolenk /* generate the helpers for OPIVX */ 22298f40dd2SKiran Ostrolenk #define GEN_VEXT_VX(NAME, ESZ) \ 22398f40dd2SKiran Ostrolenk void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 22498f40dd2SKiran Ostrolenk void *vs2, CPURISCVState *env, \ 22598f40dd2SKiran Ostrolenk uint32_t desc) \ 22698f40dd2SKiran Ostrolenk { \ 22798f40dd2SKiran Ostrolenk do_vext_vx(vd, v0, s1, vs2, env, desc, \ 22898f40dd2SKiran Ostrolenk do_##NAME, ESZ); \ 22998f40dd2SKiran Ostrolenk } 23098f40dd2SKiran Ostrolenk 2312152e48bSKiran Ostrolenk /* Three of the widening shortening macros: */ 2322152e48bSKiran Ostrolenk /* (TD, T1, T2, TX1, TX2) */ 2332152e48bSKiran Ostrolenk #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 2342152e48bSKiran Ostrolenk #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 2352152e48bSKiran Ostrolenk #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 2362152e48bSKiran Ostrolenk 23798f40dd2SKiran Ostrolenk #endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ 238