1*98f40dd2SKiran Ostrolenk /* 2*98f40dd2SKiran Ostrolenk * RISC-V Vector Extension Internals 3*98f40dd2SKiran Ostrolenk * 4*98f40dd2SKiran Ostrolenk * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5*98f40dd2SKiran Ostrolenk * 6*98f40dd2SKiran Ostrolenk * This program is free software; you can redistribute it and/or modify it 7*98f40dd2SKiran Ostrolenk * under the terms and conditions of the GNU General Public License, 8*98f40dd2SKiran Ostrolenk * version 2 or later, as published by the Free Software Foundation. 9*98f40dd2SKiran Ostrolenk * 10*98f40dd2SKiran Ostrolenk * This program is distributed in the hope it will be useful, but WITHOUT 11*98f40dd2SKiran Ostrolenk * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12*98f40dd2SKiran Ostrolenk * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13*98f40dd2SKiran Ostrolenk * more details. 14*98f40dd2SKiran Ostrolenk * 15*98f40dd2SKiran Ostrolenk * You should have received a copy of the GNU General Public License along with 16*98f40dd2SKiran Ostrolenk * this program. If not, see <http://www.gnu.org/licenses/>. 17*98f40dd2SKiran Ostrolenk */ 18*98f40dd2SKiran Ostrolenk 19*98f40dd2SKiran Ostrolenk #ifndef TARGET_RISCV_VECTOR_INTERNALS_H 20*98f40dd2SKiran Ostrolenk #define TARGET_RISCV_VECTOR_INTERNALS_H 21*98f40dd2SKiran Ostrolenk 22*98f40dd2SKiran Ostrolenk #include "qemu/osdep.h" 23*98f40dd2SKiran Ostrolenk #include "qemu/bitops.h" 24*98f40dd2SKiran Ostrolenk #include "cpu.h" 25*98f40dd2SKiran Ostrolenk #include "tcg/tcg-gvec-desc.h" 26*98f40dd2SKiran Ostrolenk #include "internals.h" 27*98f40dd2SKiran Ostrolenk 28*98f40dd2SKiran Ostrolenk static inline uint32_t vext_nf(uint32_t desc) 29*98f40dd2SKiran Ostrolenk { 30*98f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, NF); 31*98f40dd2SKiran Ostrolenk } 32*98f40dd2SKiran Ostrolenk 33*98f40dd2SKiran Ostrolenk /* 34*98f40dd2SKiran Ostrolenk * Note that vector data is stored in host-endian 64-bit chunks, 35*98f40dd2SKiran Ostrolenk * so addressing units smaller than that needs a host-endian fixup. 36*98f40dd2SKiran Ostrolenk */ 37*98f40dd2SKiran Ostrolenk #if HOST_BIG_ENDIAN 38*98f40dd2SKiran Ostrolenk #define H1(x) ((x) ^ 7) 39*98f40dd2SKiran Ostrolenk #define H1_2(x) ((x) ^ 6) 40*98f40dd2SKiran Ostrolenk #define H1_4(x) ((x) ^ 4) 41*98f40dd2SKiran Ostrolenk #define H2(x) ((x) ^ 3) 42*98f40dd2SKiran Ostrolenk #define H4(x) ((x) ^ 1) 43*98f40dd2SKiran Ostrolenk #define H8(x) ((x)) 44*98f40dd2SKiran Ostrolenk #else 45*98f40dd2SKiran Ostrolenk #define H1(x) (x) 46*98f40dd2SKiran Ostrolenk #define H1_2(x) (x) 47*98f40dd2SKiran Ostrolenk #define H1_4(x) (x) 48*98f40dd2SKiran Ostrolenk #define H2(x) (x) 49*98f40dd2SKiran Ostrolenk #define H4(x) (x) 50*98f40dd2SKiran Ostrolenk #define H8(x) (x) 51*98f40dd2SKiran Ostrolenk #endif 52*98f40dd2SKiran Ostrolenk 53*98f40dd2SKiran Ostrolenk /* 54*98f40dd2SKiran Ostrolenk * Encode LMUL to lmul as following: 55*98f40dd2SKiran Ostrolenk * LMUL vlmul lmul 56*98f40dd2SKiran Ostrolenk * 1 000 0 57*98f40dd2SKiran Ostrolenk * 2 001 1 58*98f40dd2SKiran Ostrolenk * 4 010 2 59*98f40dd2SKiran Ostrolenk * 8 011 3 60*98f40dd2SKiran Ostrolenk * - 100 - 61*98f40dd2SKiran Ostrolenk * 1/8 101 -3 62*98f40dd2SKiran Ostrolenk * 1/4 110 -2 63*98f40dd2SKiran Ostrolenk * 1/2 111 -1 64*98f40dd2SKiran Ostrolenk */ 65*98f40dd2SKiran Ostrolenk static inline int32_t vext_lmul(uint32_t desc) 66*98f40dd2SKiran Ostrolenk { 67*98f40dd2SKiran Ostrolenk return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 68*98f40dd2SKiran Ostrolenk } 69*98f40dd2SKiran Ostrolenk 70*98f40dd2SKiran Ostrolenk static inline uint32_t vext_vm(uint32_t desc) 71*98f40dd2SKiran Ostrolenk { 72*98f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VM); 73*98f40dd2SKiran Ostrolenk } 74*98f40dd2SKiran Ostrolenk 75*98f40dd2SKiran Ostrolenk static inline uint32_t vext_vma(uint32_t desc) 76*98f40dd2SKiran Ostrolenk { 77*98f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VMA); 78*98f40dd2SKiran Ostrolenk } 79*98f40dd2SKiran Ostrolenk 80*98f40dd2SKiran Ostrolenk static inline uint32_t vext_vta(uint32_t desc) 81*98f40dd2SKiran Ostrolenk { 82*98f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VTA); 83*98f40dd2SKiran Ostrolenk } 84*98f40dd2SKiran Ostrolenk 85*98f40dd2SKiran Ostrolenk static inline uint32_t vext_vta_all_1s(uint32_t desc) 86*98f40dd2SKiran Ostrolenk { 87*98f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 88*98f40dd2SKiran Ostrolenk } 89*98f40dd2SKiran Ostrolenk 90*98f40dd2SKiran Ostrolenk /* 91*98f40dd2SKiran Ostrolenk * Earlier designs (pre-0.9) had a varying number of bits 92*98f40dd2SKiran Ostrolenk * per mask value (MLEN). In the 0.9 design, MLEN=1. 93*98f40dd2SKiran Ostrolenk * (Section 4.5) 94*98f40dd2SKiran Ostrolenk */ 95*98f40dd2SKiran Ostrolenk static inline int vext_elem_mask(void *v0, int index) 96*98f40dd2SKiran Ostrolenk { 97*98f40dd2SKiran Ostrolenk int idx = index / 64; 98*98f40dd2SKiran Ostrolenk int pos = index % 64; 99*98f40dd2SKiran Ostrolenk return (((uint64_t *)v0)[idx] >> pos) & 1; 100*98f40dd2SKiran Ostrolenk } 101*98f40dd2SKiran Ostrolenk 102*98f40dd2SKiran Ostrolenk /* 103*98f40dd2SKiran Ostrolenk * Get number of total elements, including prestart, body and tail elements. 104*98f40dd2SKiran Ostrolenk * Note that when LMUL < 1, the tail includes the elements past VLMAX that 105*98f40dd2SKiran Ostrolenk * are held in the same vector register. 106*98f40dd2SKiran Ostrolenk */ 107*98f40dd2SKiran Ostrolenk static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 108*98f40dd2SKiran Ostrolenk uint32_t esz) 109*98f40dd2SKiran Ostrolenk { 110*98f40dd2SKiran Ostrolenk uint32_t vlenb = simd_maxsz(desc); 111*98f40dd2SKiran Ostrolenk uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 112*98f40dd2SKiran Ostrolenk int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 113*98f40dd2SKiran Ostrolenk ctzl(esz) - ctzl(sew) + vext_lmul(desc); 114*98f40dd2SKiran Ostrolenk return (vlenb << emul) / esz; 115*98f40dd2SKiran Ostrolenk } 116*98f40dd2SKiran Ostrolenk 117*98f40dd2SKiran Ostrolenk /* set agnostic elements to 1s */ 118*98f40dd2SKiran Ostrolenk void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 119*98f40dd2SKiran Ostrolenk uint32_t tot); 120*98f40dd2SKiran Ostrolenk 121*98f40dd2SKiran Ostrolenk /* expand macro args before macro */ 122*98f40dd2SKiran Ostrolenk #define RVVCALL(macro, ...) macro(__VA_ARGS__) 123*98f40dd2SKiran Ostrolenk 124*98f40dd2SKiran Ostrolenk /* (TD, T1, T2, TX1, TX2) */ 125*98f40dd2SKiran Ostrolenk #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 126*98f40dd2SKiran Ostrolenk #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 127*98f40dd2SKiran Ostrolenk #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 128*98f40dd2SKiran Ostrolenk #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 129*98f40dd2SKiran Ostrolenk 130*98f40dd2SKiran Ostrolenk /* operation of two vector elements */ 131*98f40dd2SKiran Ostrolenk typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 132*98f40dd2SKiran Ostrolenk 133*98f40dd2SKiran Ostrolenk #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 134*98f40dd2SKiran Ostrolenk static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 135*98f40dd2SKiran Ostrolenk { \ 136*98f40dd2SKiran Ostrolenk TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 137*98f40dd2SKiran Ostrolenk TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 138*98f40dd2SKiran Ostrolenk *((TD *)vd + HD(i)) = OP(s2, s1); \ 139*98f40dd2SKiran Ostrolenk } 140*98f40dd2SKiran Ostrolenk 141*98f40dd2SKiran Ostrolenk void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 142*98f40dd2SKiran Ostrolenk CPURISCVState *env, uint32_t desc, 143*98f40dd2SKiran Ostrolenk opivv2_fn *fn, uint32_t esz); 144*98f40dd2SKiran Ostrolenk 145*98f40dd2SKiran Ostrolenk /* generate the helpers for OPIVV */ 146*98f40dd2SKiran Ostrolenk #define GEN_VEXT_VV(NAME, ESZ) \ 147*98f40dd2SKiran Ostrolenk void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 148*98f40dd2SKiran Ostrolenk void *vs2, CPURISCVState *env, \ 149*98f40dd2SKiran Ostrolenk uint32_t desc) \ 150*98f40dd2SKiran Ostrolenk { \ 151*98f40dd2SKiran Ostrolenk do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 152*98f40dd2SKiran Ostrolenk do_##NAME, ESZ); \ 153*98f40dd2SKiran Ostrolenk } 154*98f40dd2SKiran Ostrolenk 155*98f40dd2SKiran Ostrolenk typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 156*98f40dd2SKiran Ostrolenk 157*98f40dd2SKiran Ostrolenk /* 158*98f40dd2SKiran Ostrolenk * (T1)s1 gives the real operator type. 159*98f40dd2SKiran Ostrolenk * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 160*98f40dd2SKiran Ostrolenk */ 161*98f40dd2SKiran Ostrolenk #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 162*98f40dd2SKiran Ostrolenk static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 163*98f40dd2SKiran Ostrolenk { \ 164*98f40dd2SKiran Ostrolenk TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 165*98f40dd2SKiran Ostrolenk *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 166*98f40dd2SKiran Ostrolenk } 167*98f40dd2SKiran Ostrolenk 168*98f40dd2SKiran Ostrolenk void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 169*98f40dd2SKiran Ostrolenk CPURISCVState *env, uint32_t desc, 170*98f40dd2SKiran Ostrolenk opivx2_fn fn, uint32_t esz); 171*98f40dd2SKiran Ostrolenk 172*98f40dd2SKiran Ostrolenk /* generate the helpers for OPIVX */ 173*98f40dd2SKiran Ostrolenk #define GEN_VEXT_VX(NAME, ESZ) \ 174*98f40dd2SKiran Ostrolenk void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 175*98f40dd2SKiran Ostrolenk void *vs2, CPURISCVState *env, \ 176*98f40dd2SKiran Ostrolenk uint32_t desc) \ 177*98f40dd2SKiran Ostrolenk { \ 178*98f40dd2SKiran Ostrolenk do_vext_vx(vd, v0, s1, vs2, env, desc, \ 179*98f40dd2SKiran Ostrolenk do_##NAME, ESZ); \ 180*98f40dd2SKiran Ostrolenk } 181*98f40dd2SKiran Ostrolenk 182*98f40dd2SKiran Ostrolenk #endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ 183