198f40dd2SKiran Ostrolenk /* 298f40dd2SKiran Ostrolenk * RISC-V Vector Extension Internals 398f40dd2SKiran Ostrolenk * 498f40dd2SKiran Ostrolenk * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 598f40dd2SKiran Ostrolenk * 698f40dd2SKiran Ostrolenk * This program is free software; you can redistribute it and/or modify it 798f40dd2SKiran Ostrolenk * under the terms and conditions of the GNU General Public License, 898f40dd2SKiran Ostrolenk * version 2 or later, as published by the Free Software Foundation. 998f40dd2SKiran Ostrolenk * 1098f40dd2SKiran Ostrolenk * This program is distributed in the hope it will be useful, but WITHOUT 1198f40dd2SKiran Ostrolenk * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1298f40dd2SKiran Ostrolenk * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 1398f40dd2SKiran Ostrolenk * more details. 1498f40dd2SKiran Ostrolenk * 1598f40dd2SKiran Ostrolenk * You should have received a copy of the GNU General Public License along with 1698f40dd2SKiran Ostrolenk * this program. If not, see <http://www.gnu.org/licenses/>. 1798f40dd2SKiran Ostrolenk */ 1898f40dd2SKiran Ostrolenk 1998f40dd2SKiran Ostrolenk #ifndef TARGET_RISCV_VECTOR_INTERNALS_H 2098f40dd2SKiran Ostrolenk #define TARGET_RISCV_VECTOR_INTERNALS_H 2198f40dd2SKiran Ostrolenk 2298f40dd2SKiran Ostrolenk #include "qemu/osdep.h" 2398f40dd2SKiran Ostrolenk #include "qemu/bitops.h" 2498f40dd2SKiran Ostrolenk #include "cpu.h" 2598f40dd2SKiran Ostrolenk #include "tcg/tcg-gvec-desc.h" 2698f40dd2SKiran Ostrolenk #include "internals.h" 2798f40dd2SKiran Ostrolenk 2898f40dd2SKiran Ostrolenk static inline uint32_t vext_nf(uint32_t desc) 2998f40dd2SKiran Ostrolenk { 3098f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, NF); 3198f40dd2SKiran Ostrolenk } 3298f40dd2SKiran Ostrolenk 3398f40dd2SKiran Ostrolenk /* 3498f40dd2SKiran Ostrolenk * Note that vector data is stored in host-endian 64-bit chunks, 3598f40dd2SKiran Ostrolenk * so addressing units smaller than that needs a host-endian fixup. 3698f40dd2SKiran Ostrolenk */ 3798f40dd2SKiran Ostrolenk #if HOST_BIG_ENDIAN 3898f40dd2SKiran Ostrolenk #define H1(x) ((x) ^ 7) 3998f40dd2SKiran Ostrolenk #define H1_2(x) ((x) ^ 6) 4098f40dd2SKiran Ostrolenk #define H1_4(x) ((x) ^ 4) 4198f40dd2SKiran Ostrolenk #define H2(x) ((x) ^ 3) 4298f40dd2SKiran Ostrolenk #define H4(x) ((x) ^ 1) 4398f40dd2SKiran Ostrolenk #define H8(x) ((x)) 4498f40dd2SKiran Ostrolenk #else 4598f40dd2SKiran Ostrolenk #define H1(x) (x) 4698f40dd2SKiran Ostrolenk #define H1_2(x) (x) 4798f40dd2SKiran Ostrolenk #define H1_4(x) (x) 4898f40dd2SKiran Ostrolenk #define H2(x) (x) 4998f40dd2SKiran Ostrolenk #define H4(x) (x) 5098f40dd2SKiran Ostrolenk #define H8(x) (x) 5198f40dd2SKiran Ostrolenk #endif 5298f40dd2SKiran Ostrolenk 5398f40dd2SKiran Ostrolenk /* 5498f40dd2SKiran Ostrolenk * Encode LMUL to lmul as following: 5598f40dd2SKiran Ostrolenk * LMUL vlmul lmul 5698f40dd2SKiran Ostrolenk * 1 000 0 5798f40dd2SKiran Ostrolenk * 2 001 1 5898f40dd2SKiran Ostrolenk * 4 010 2 5998f40dd2SKiran Ostrolenk * 8 011 3 6098f40dd2SKiran Ostrolenk * - 100 - 6198f40dd2SKiran Ostrolenk * 1/8 101 -3 6298f40dd2SKiran Ostrolenk * 1/4 110 -2 6398f40dd2SKiran Ostrolenk * 1/2 111 -1 6498f40dd2SKiran Ostrolenk */ 6598f40dd2SKiran Ostrolenk static inline int32_t vext_lmul(uint32_t desc) 6698f40dd2SKiran Ostrolenk { 6798f40dd2SKiran Ostrolenk return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 6898f40dd2SKiran Ostrolenk } 6998f40dd2SKiran Ostrolenk 7098f40dd2SKiran Ostrolenk static inline uint32_t vext_vm(uint32_t desc) 7198f40dd2SKiran Ostrolenk { 7298f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VM); 7398f40dd2SKiran Ostrolenk } 7498f40dd2SKiran Ostrolenk 7598f40dd2SKiran Ostrolenk static inline uint32_t vext_vma(uint32_t desc) 7698f40dd2SKiran Ostrolenk { 7798f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VMA); 7898f40dd2SKiran Ostrolenk } 7998f40dd2SKiran Ostrolenk 8098f40dd2SKiran Ostrolenk static inline uint32_t vext_vta(uint32_t desc) 8198f40dd2SKiran Ostrolenk { 8298f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VTA); 8398f40dd2SKiran Ostrolenk } 8498f40dd2SKiran Ostrolenk 8598f40dd2SKiran Ostrolenk static inline uint32_t vext_vta_all_1s(uint32_t desc) 8698f40dd2SKiran Ostrolenk { 8798f40dd2SKiran Ostrolenk return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 8898f40dd2SKiran Ostrolenk } 8998f40dd2SKiran Ostrolenk 9098f40dd2SKiran Ostrolenk /* 9198f40dd2SKiran Ostrolenk * Earlier designs (pre-0.9) had a varying number of bits 9298f40dd2SKiran Ostrolenk * per mask value (MLEN). In the 0.9 design, MLEN=1. 9398f40dd2SKiran Ostrolenk * (Section 4.5) 9498f40dd2SKiran Ostrolenk */ 9598f40dd2SKiran Ostrolenk static inline int vext_elem_mask(void *v0, int index) 9698f40dd2SKiran Ostrolenk { 9798f40dd2SKiran Ostrolenk int idx = index / 64; 9898f40dd2SKiran Ostrolenk int pos = index % 64; 9998f40dd2SKiran Ostrolenk return (((uint64_t *)v0)[idx] >> pos) & 1; 10098f40dd2SKiran Ostrolenk } 10198f40dd2SKiran Ostrolenk 10298f40dd2SKiran Ostrolenk /* 10398f40dd2SKiran Ostrolenk * Get number of total elements, including prestart, body and tail elements. 10498f40dd2SKiran Ostrolenk * Note that when LMUL < 1, the tail includes the elements past VLMAX that 10598f40dd2SKiran Ostrolenk * are held in the same vector register. 10698f40dd2SKiran Ostrolenk */ 10798f40dd2SKiran Ostrolenk static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 10898f40dd2SKiran Ostrolenk uint32_t esz) 10998f40dd2SKiran Ostrolenk { 11098f40dd2SKiran Ostrolenk uint32_t vlenb = simd_maxsz(desc); 11198f40dd2SKiran Ostrolenk uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 11298f40dd2SKiran Ostrolenk int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 11398f40dd2SKiran Ostrolenk ctzl(esz) - ctzl(sew) + vext_lmul(desc); 11498f40dd2SKiran Ostrolenk return (vlenb << emul) / esz; 11598f40dd2SKiran Ostrolenk } 11698f40dd2SKiran Ostrolenk 11798f40dd2SKiran Ostrolenk /* set agnostic elements to 1s */ 11898f40dd2SKiran Ostrolenk void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 11998f40dd2SKiran Ostrolenk uint32_t tot); 12098f40dd2SKiran Ostrolenk 12198f40dd2SKiran Ostrolenk /* expand macro args before macro */ 12298f40dd2SKiran Ostrolenk #define RVVCALL(macro, ...) macro(__VA_ARGS__) 12398f40dd2SKiran Ostrolenk 124*2152e48bSKiran Ostrolenk /* (TD, T2, TX2) */ 125*2152e48bSKiran Ostrolenk #define OP_UU_B uint8_t, uint8_t, uint8_t 126*2152e48bSKiran Ostrolenk #define OP_UU_H uint16_t, uint16_t, uint16_t 127*2152e48bSKiran Ostrolenk #define OP_UU_W uint32_t, uint32_t, uint32_t 128*2152e48bSKiran Ostrolenk #define OP_UU_D uint64_t, uint64_t, uint64_t 129*2152e48bSKiran Ostrolenk 13098f40dd2SKiran Ostrolenk /* (TD, T1, T2, TX1, TX2) */ 13198f40dd2SKiran Ostrolenk #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 13298f40dd2SKiran Ostrolenk #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 13398f40dd2SKiran Ostrolenk #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 13498f40dd2SKiran Ostrolenk #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 13598f40dd2SKiran Ostrolenk 136*2152e48bSKiran Ostrolenk #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 137*2152e48bSKiran Ostrolenk static void do_##NAME(void *vd, void *vs2, int i) \ 138*2152e48bSKiran Ostrolenk { \ 139*2152e48bSKiran Ostrolenk TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 140*2152e48bSKiran Ostrolenk *((TD *)vd + HD(i)) = OP(s2); \ 141*2152e48bSKiran Ostrolenk } 142*2152e48bSKiran Ostrolenk 143*2152e48bSKiran Ostrolenk #define GEN_VEXT_V(NAME, ESZ) \ 144*2152e48bSKiran Ostrolenk void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 145*2152e48bSKiran Ostrolenk CPURISCVState *env, uint32_t desc) \ 146*2152e48bSKiran Ostrolenk { \ 147*2152e48bSKiran Ostrolenk uint32_t vm = vext_vm(desc); \ 148*2152e48bSKiran Ostrolenk uint32_t vl = env->vl; \ 149*2152e48bSKiran Ostrolenk uint32_t total_elems = \ 150*2152e48bSKiran Ostrolenk vext_get_total_elems(env, desc, ESZ); \ 151*2152e48bSKiran Ostrolenk uint32_t vta = vext_vta(desc); \ 152*2152e48bSKiran Ostrolenk uint32_t vma = vext_vma(desc); \ 153*2152e48bSKiran Ostrolenk uint32_t i; \ 154*2152e48bSKiran Ostrolenk \ 155*2152e48bSKiran Ostrolenk for (i = env->vstart; i < vl; i++) { \ 156*2152e48bSKiran Ostrolenk if (!vm && !vext_elem_mask(v0, i)) { \ 157*2152e48bSKiran Ostrolenk /* set masked-off elements to 1s */ \ 158*2152e48bSKiran Ostrolenk vext_set_elems_1s(vd, vma, i * ESZ, \ 159*2152e48bSKiran Ostrolenk (i + 1) * ESZ); \ 160*2152e48bSKiran Ostrolenk continue; \ 161*2152e48bSKiran Ostrolenk } \ 162*2152e48bSKiran Ostrolenk do_##NAME(vd, vs2, i); \ 163*2152e48bSKiran Ostrolenk } \ 164*2152e48bSKiran Ostrolenk env->vstart = 0; \ 165*2152e48bSKiran Ostrolenk /* set tail elements to 1s */ \ 166*2152e48bSKiran Ostrolenk vext_set_elems_1s(vd, vta, vl * ESZ, \ 167*2152e48bSKiran Ostrolenk total_elems * ESZ); \ 168*2152e48bSKiran Ostrolenk } 169*2152e48bSKiran Ostrolenk 17098f40dd2SKiran Ostrolenk /* operation of two vector elements */ 17198f40dd2SKiran Ostrolenk typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 17298f40dd2SKiran Ostrolenk 17398f40dd2SKiran Ostrolenk #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 17498f40dd2SKiran Ostrolenk static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 17598f40dd2SKiran Ostrolenk { \ 17698f40dd2SKiran Ostrolenk TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 17798f40dd2SKiran Ostrolenk TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 17898f40dd2SKiran Ostrolenk *((TD *)vd + HD(i)) = OP(s2, s1); \ 17998f40dd2SKiran Ostrolenk } 18098f40dd2SKiran Ostrolenk 18198f40dd2SKiran Ostrolenk void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 18298f40dd2SKiran Ostrolenk CPURISCVState *env, uint32_t desc, 18398f40dd2SKiran Ostrolenk opivv2_fn *fn, uint32_t esz); 18498f40dd2SKiran Ostrolenk 18598f40dd2SKiran Ostrolenk /* generate the helpers for OPIVV */ 18698f40dd2SKiran Ostrolenk #define GEN_VEXT_VV(NAME, ESZ) \ 18798f40dd2SKiran Ostrolenk void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 18898f40dd2SKiran Ostrolenk void *vs2, CPURISCVState *env, \ 18998f40dd2SKiran Ostrolenk uint32_t desc) \ 19098f40dd2SKiran Ostrolenk { \ 19198f40dd2SKiran Ostrolenk do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 19298f40dd2SKiran Ostrolenk do_##NAME, ESZ); \ 19398f40dd2SKiran Ostrolenk } 19498f40dd2SKiran Ostrolenk 19598f40dd2SKiran Ostrolenk typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 19698f40dd2SKiran Ostrolenk 19798f40dd2SKiran Ostrolenk /* 19898f40dd2SKiran Ostrolenk * (T1)s1 gives the real operator type. 19998f40dd2SKiran Ostrolenk * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 20098f40dd2SKiran Ostrolenk */ 20198f40dd2SKiran Ostrolenk #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 20298f40dd2SKiran Ostrolenk static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 20398f40dd2SKiran Ostrolenk { \ 20498f40dd2SKiran Ostrolenk TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 20598f40dd2SKiran Ostrolenk *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 20698f40dd2SKiran Ostrolenk } 20798f40dd2SKiran Ostrolenk 20898f40dd2SKiran Ostrolenk void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 20998f40dd2SKiran Ostrolenk CPURISCVState *env, uint32_t desc, 21098f40dd2SKiran Ostrolenk opivx2_fn fn, uint32_t esz); 21198f40dd2SKiran Ostrolenk 21298f40dd2SKiran Ostrolenk /* generate the helpers for OPIVX */ 21398f40dd2SKiran Ostrolenk #define GEN_VEXT_VX(NAME, ESZ) \ 21498f40dd2SKiran Ostrolenk void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 21598f40dd2SKiran Ostrolenk void *vs2, CPURISCVState *env, \ 21698f40dd2SKiran Ostrolenk uint32_t desc) \ 21798f40dd2SKiran Ostrolenk { \ 21898f40dd2SKiran Ostrolenk do_vext_vx(vd, v0, s1, vs2, env, desc, \ 21998f40dd2SKiran Ostrolenk do_##NAME, ESZ); \ 22098f40dd2SKiran Ostrolenk } 22198f40dd2SKiran Ostrolenk 222*2152e48bSKiran Ostrolenk /* Three of the widening shortening macros: */ 223*2152e48bSKiran Ostrolenk /* (TD, T1, T2, TX1, TX2) */ 224*2152e48bSKiran Ostrolenk #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 225*2152e48bSKiran Ostrolenk #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 226*2152e48bSKiran Ostrolenk #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 227*2152e48bSKiran Ostrolenk 22898f40dd2SKiran Ostrolenk #endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ 229