1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "accel/tcg/cpu-ldst.h"
25 #include "accel/tcg/probe.h"
26 #include "exec/page-protection.h"
27 #include "exec/helper-proto.h"
28 #include "exec/tlb-flags.h"
29 #include "exec/target_page.h"
30 #include "exec/tswap.h"
31 #include "fpu/softfloat.h"
32 #include "tcg/tcg-gvec-desc.h"
33 #include "internals.h"
34 #include "vector_internals.h"
35 #include <math.h>
36
HELPER(vsetvl)37 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
38 target_ulong s2)
39 {
40 int vlmax, vl;
41 RISCVCPU *cpu = env_archcpu(env);
42 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
43 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
44 uint16_t sew = 8 << vsew;
45 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
46 int xlen = riscv_cpu_xlen(env);
47 bool vill = (s2 >> (xlen - 1)) & 0x1;
48 target_ulong reserved = s2 &
49 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
50 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
51 uint16_t vlen = cpu->cfg.vlenb << 3;
52 int8_t lmul;
53
54 if (vlmul & 4) {
55 /*
56 * Fractional LMUL, check:
57 *
58 * VLEN * LMUL >= SEW
59 * VLEN >> (8 - lmul) >= sew
60 * (vlenb << 3) >> (8 - lmul) >= sew
61 */
62 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
63 vill = true;
64 }
65 }
66
67 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
68 /* only set vill bit. */
69 env->vill = 1;
70 env->vtype = 0;
71 env->vl = 0;
72 env->vstart = 0;
73 return 0;
74 }
75
76 /* lmul encoded as in DisasContext::lmul */
77 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
78 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
79 if (s1 <= vlmax) {
80 vl = s1;
81 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) {
82 vl = (s1 + 1) >> 1;
83 } else {
84 vl = vlmax;
85 }
86 env->vl = vl;
87 env->vtype = s2;
88 env->vstart = 0;
89 env->vill = 0;
90 return vl;
91 }
92
93 /*
94 * Get the maximum number of elements can be operated.
95 *
96 * log2_esz: log2 of element size in bytes.
97 */
vext_max_elems(uint32_t desc,uint32_t log2_esz)98 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
99 {
100 /*
101 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
102 * so vlen in bytes (vlenb) is encoded as maxsz.
103 */
104 uint32_t vlenb = simd_maxsz(desc);
105
106 /* Return VLMAX */
107 int scale = vext_lmul(desc) - log2_esz;
108 return scale < 0 ? vlenb >> -scale : vlenb << scale;
109 }
110
111 /*
112 * This function checks watchpoint before real load operation.
113 *
114 * In system mode, the TLB API probe_access is enough for watchpoint check.
115 * In user mode, there is no watchpoint support now.
116 *
117 * It will trigger an exception if there is no mapping in TLB
118 * and page table walk can't fill the TLB entry. Then the guest
119 * software can return here after process the exception or never return.
120 *
121 * This function can also be used when direct access to probe_access_flags is
122 * needed in order to access the flags. If a pointer to a flags operand is
123 * provided the function will call probe_access_flags instead, use nonfault
124 * and update host and flags.
125 */
probe_pages(CPURISCVState * env,target_ulong addr,target_ulong len,uintptr_t ra,MMUAccessType access_type,int mmu_index,void ** host,int * flags,bool nonfault)126 static void probe_pages(CPURISCVState *env, target_ulong addr, target_ulong len,
127 uintptr_t ra, MMUAccessType access_type, int mmu_index,
128 void **host, int *flags, bool nonfault)
129 {
130 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
131 target_ulong curlen = MIN(pagelen, len);
132
133 if (flags != NULL) {
134 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen,
135 access_type, mmu_index, nonfault, host, ra);
136 } else {
137 probe_access(env, adjust_addr(env, addr), curlen, access_type,
138 mmu_index, ra);
139 }
140
141 if (len > curlen) {
142 addr += curlen;
143 curlen = len - curlen;
144 if (flags != NULL) {
145 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen,
146 access_type, mmu_index, nonfault,
147 host, ra);
148 } else {
149 probe_access(env, adjust_addr(env, addr), curlen, access_type,
150 mmu_index, ra);
151 }
152 }
153 }
154
155
vext_set_elem_mask(void * v0,int index,uint8_t value)156 static inline void vext_set_elem_mask(void *v0, int index,
157 uint8_t value)
158 {
159 int idx = index / 64;
160 int pos = index % 64;
161 uint64_t old = ((uint64_t *)v0)[idx];
162 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
163 }
164
165 /* elements operations for load and store */
166 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
167 uint32_t idx, void *vd, uintptr_t retaddr);
168 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
169
170 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
171 static inline QEMU_ALWAYS_INLINE \
172 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
173 uint32_t idx, void *vd, uintptr_t retaddr) \
174 { \
175 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
176 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
177 } \
178 \
179 static inline QEMU_ALWAYS_INLINE \
180 void NAME##_host(void *vd, uint32_t idx, void *host) \
181 { \
182 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
183 *cur = (ETYPE)LDSUF##_p(host); \
184 }
185
GEN_VEXT_LD_ELEM(lde_b,uint8_t,H1,ldub)186 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub)
187 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
188 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
189 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
190
191 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
192 static inline QEMU_ALWAYS_INLINE \
193 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
194 uint32_t idx, void *vd, uintptr_t retaddr) \
195 { \
196 ETYPE data = *((ETYPE *)vd + H(idx)); \
197 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
198 } \
199 \
200 static inline QEMU_ALWAYS_INLINE \
201 void NAME##_host(void *vd, uint32_t idx, void *host) \
202 { \
203 ETYPE data = *((ETYPE *)vd + H(idx)); \
204 STSUF##_p(host, data); \
205 }
206
207 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb)
208 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
209 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
210 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
211
212 static inline QEMU_ALWAYS_INLINE void
213 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
214 void *vd, uint32_t evl, target_ulong addr,
215 uint32_t reg_start, uintptr_t ra, uint32_t esz,
216 bool is_load)
217 {
218 uint32_t i;
219 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) {
220 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra);
221 }
222 }
223
224 static inline QEMU_ALWAYS_INLINE void
vext_continuous_ldst_host(CPURISCVState * env,vext_ldst_elem_fn_host * ldst_host,void * vd,uint32_t evl,uint32_t reg_start,void * host,uint32_t esz,bool is_load)225 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
226 void *vd, uint32_t evl, uint32_t reg_start, void *host,
227 uint32_t esz, bool is_load)
228 {
229 #if HOST_BIG_ENDIAN
230 for (; reg_start < evl; reg_start++, host += esz) {
231 ldst_host(vd, reg_start, host);
232 }
233 #else
234 if (esz == 1) {
235 uint32_t byte_offset = reg_start * esz;
236 uint32_t size = (evl - reg_start) * esz;
237
238 if (is_load) {
239 memcpy(vd + byte_offset, host, size);
240 } else {
241 memcpy(host, vd + byte_offset, size);
242 }
243 } else {
244 for (; reg_start < evl; reg_start++, host += esz) {
245 ldst_host(vd, reg_start, host);
246 }
247 }
248 #endif
249 }
250
vext_set_tail_elems_1s(target_ulong vl,void * vd,uint32_t desc,uint32_t nf,uint32_t esz,uint32_t max_elems)251 static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
252 uint32_t desc, uint32_t nf,
253 uint32_t esz, uint32_t max_elems)
254 {
255 uint32_t vta = vext_vta(desc);
256 int k;
257
258 if (vta == 0) {
259 return;
260 }
261
262 for (k = 0; k < nf; ++k) {
263 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
264 (k * max_elems + max_elems) * esz);
265 }
266 }
267
268 /*
269 * stride: access vector element from strided memory
270 */
271 static void
vext_ldst_stride(void * vd,void * v0,target_ulong base,target_ulong stride,CPURISCVState * env,uint32_t desc,uint32_t vm,vext_ldst_elem_fn_tlb * ldst_elem,uint32_t log2_esz,uintptr_t ra)272 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride,
273 CPURISCVState *env, uint32_t desc, uint32_t vm,
274 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz,
275 uintptr_t ra)
276 {
277 uint32_t i, k;
278 uint32_t nf = vext_nf(desc);
279 uint32_t max_elems = vext_max_elems(desc, log2_esz);
280 uint32_t esz = 1 << log2_esz;
281 uint32_t vma = vext_vma(desc);
282
283 VSTART_CHECK_EARLY_EXIT(env, env->vl);
284
285 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
286 k = 0;
287 while (k < nf) {
288 if (!vm && !vext_elem_mask(v0, i)) {
289 /* set masked-off elements to 1s */
290 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
291 (i + k * max_elems + 1) * esz);
292 k++;
293 continue;
294 }
295 target_ulong addr = base + stride * i + (k << log2_esz);
296 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
297 k++;
298 }
299 }
300 env->vstart = 0;
301
302 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
303 }
304
305 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
306 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
307 target_ulong stride, CPURISCVState *env, \
308 uint32_t desc) \
309 { \
310 uint32_t vm = vext_vm(desc); \
311 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
312 ctzl(sizeof(ETYPE)), GETPC()); \
313 }
314
GEN_VEXT_LD_STRIDE(vlse8_v,int8_t,lde_b_tlb)315 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb)
316 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb)
317 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb)
318 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb)
319
320 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
321 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
322 target_ulong stride, CPURISCVState *env, \
323 uint32_t desc) \
324 { \
325 uint32_t vm = vext_vm(desc); \
326 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
327 ctzl(sizeof(ETYPE)), GETPC()); \
328 }
329
330 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb)
331 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb)
332 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb)
333 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
334
335 /*
336 * unit-stride: access elements stored contiguously in memory
337 */
338
339 /* unmasked unit-stride load and store operation */
340 static inline QEMU_ALWAYS_INLINE void
341 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
342 uint32_t elems, uint32_t nf, uint32_t max_elems,
343 uint32_t log2_esz, bool is_load, int mmu_index,
344 vext_ldst_elem_fn_tlb *ldst_tlb,
345 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra)
346 {
347 void *host;
348 int i, k, flags;
349 uint32_t esz = 1 << log2_esz;
350 uint32_t size = (elems * nf) << log2_esz;
351 uint32_t evl = env->vstart + elems;
352 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
353
354 /* Check page permission/pmp/watchpoint/etc. */
355 probe_pages(env, addr, size, ra, access_type, mmu_index, &host, &flags,
356 true);
357
358 if (flags == 0) {
359 if (nf == 1) {
360 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart,
361 host, esz, is_load);
362 } else {
363 for (i = env->vstart; i < evl; ++i) {
364 k = 0;
365 while (k < nf) {
366 ldst_host(vd, i + k * max_elems, host);
367 host += esz;
368 k++;
369 }
370 }
371 }
372 env->vstart += elems;
373 } else {
374 if (nf == 1) {
375 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart,
376 ra, esz, is_load);
377 } else {
378 /* load bytes from guest memory */
379 for (i = env->vstart; i < evl; env->vstart = ++i) {
380 k = 0;
381 while (k < nf) {
382 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
383 vd, ra);
384 addr += esz;
385 k++;
386 }
387 }
388 }
389 }
390 }
391
392 static inline QEMU_ALWAYS_INLINE void
vext_ldst_us(void * vd,target_ulong base,CPURISCVState * env,uint32_t desc,vext_ldst_elem_fn_tlb * ldst_tlb,vext_ldst_elem_fn_host * ldst_host,uint32_t log2_esz,uint32_t evl,uintptr_t ra,bool is_load)393 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
394 vext_ldst_elem_fn_tlb *ldst_tlb,
395 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
396 uint32_t evl, uintptr_t ra, bool is_load)
397 {
398 uint32_t k;
399 target_ulong page_split, elems, addr;
400 uint32_t nf = vext_nf(desc);
401 uint32_t max_elems = vext_max_elems(desc, log2_esz);
402 uint32_t esz = 1 << log2_esz;
403 uint32_t msize = nf * esz;
404 int mmu_index = riscv_env_mmu_index(env, false);
405
406 VSTART_CHECK_EARLY_EXIT(env, evl);
407
408 #if defined(CONFIG_USER_ONLY)
409 /*
410 * For data sizes <= 6 bytes we get better performance by simply calling
411 * vext_continuous_ldst_tlb
412 */
413 if (nf == 1 && (evl << log2_esz) <= 6) {
414 addr = base + (env->vstart << log2_esz);
415 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra,
416 esz, is_load);
417
418 env->vstart = 0;
419 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
420 return;
421 }
422 #endif
423
424 /* Calculate the page range of first page */
425 addr = base + ((env->vstart * nf) << log2_esz);
426 page_split = -(addr | TARGET_PAGE_MASK);
427 /* Get number of elements */
428 elems = page_split / msize;
429 if (unlikely(env->vstart + elems >= evl)) {
430 elems = evl - env->vstart;
431 }
432
433 /* Load/store elements in the first page */
434 if (likely(elems)) {
435 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
436 is_load, mmu_index, ldst_tlb, ldst_host, ra);
437 }
438
439 /* Load/store elements in the second page */
440 if (unlikely(env->vstart < evl)) {
441 /* Cross page element */
442 if (unlikely(page_split % msize)) {
443 for (k = 0; k < nf; k++) {
444 addr = base + ((env->vstart * nf + k) << log2_esz);
445 ldst_tlb(env, adjust_addr(env, addr),
446 env->vstart + k * max_elems, vd, ra);
447 }
448 env->vstart++;
449 }
450
451 addr = base + ((env->vstart * nf) << log2_esz);
452 /* Get number of elements of second page */
453 elems = evl - env->vstart;
454
455 /* Load/store elements in the second page */
456 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
457 is_load, mmu_index, ldst_tlb, ldst_host, ra);
458 }
459
460 env->vstart = 0;
461 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
462 }
463
464 /*
465 * masked unit-stride load and store operation will be a special case of
466 * stride, stride = NF * sizeof (ETYPE)
467 */
468
469 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
470 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
471 CPURISCVState *env, uint32_t desc) \
472 { \
473 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
474 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
475 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
476 } \
477 \
478 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
479 CPURISCVState *env, uint32_t desc) \
480 { \
481 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
482 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \
483 }
484
GEN_VEXT_LD_US(vle8_v,int8_t,lde_b_tlb,lde_b_host)485 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host)
486 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host)
487 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host)
488 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host)
489
490 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
491 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
492 CPURISCVState *env, uint32_t desc) \
493 { \
494 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
495 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
496 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
497 } \
498 \
499 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
500 CPURISCVState *env, uint32_t desc) \
501 { \
502 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
503 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \
504 }
505
506 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host)
507 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host)
508 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host)
509 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host)
510
511 /*
512 * unit stride mask load and store, EEW = 1
513 */
514 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
515 CPURISCVState *env, uint32_t desc)
516 {
517 /* evl = ceil(vl/8) */
518 uint8_t evl = (env->vl + 7) >> 3;
519 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host,
520 0, evl, GETPC(), true);
521 }
522
HELPER(vsm_v)523 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
524 CPURISCVState *env, uint32_t desc)
525 {
526 /* evl = ceil(vl/8) */
527 uint8_t evl = (env->vl + 7) >> 3;
528 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host,
529 0, evl, GETPC(), false);
530 }
531
532 /*
533 * index: access vector element from indexed memory
534 */
535 typedef target_ulong vext_get_index_addr(target_ulong base,
536 uint32_t idx, void *vs2);
537
538 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
539 static target_ulong NAME(target_ulong base, \
540 uint32_t idx, void *vs2) \
541 { \
542 return (base + *((ETYPE *)vs2 + H(idx))); \
543 }
544
GEN_VEXT_GET_INDEX_ADDR(idx_b,uint8_t,H1)545 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
546 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
547 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
548 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
549
550 static inline void
551 vext_ldst_index(void *vd, void *v0, target_ulong base,
552 void *vs2, CPURISCVState *env, uint32_t desc,
553 vext_get_index_addr get_index_addr,
554 vext_ldst_elem_fn_tlb *ldst_elem,
555 uint32_t log2_esz, uintptr_t ra)
556 {
557 uint32_t i, k;
558 uint32_t nf = vext_nf(desc);
559 uint32_t vm = vext_vm(desc);
560 uint32_t max_elems = vext_max_elems(desc, log2_esz);
561 uint32_t esz = 1 << log2_esz;
562 uint32_t vma = vext_vma(desc);
563
564 VSTART_CHECK_EARLY_EXIT(env, env->vl);
565
566 /* load bytes from guest memory */
567 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
568 k = 0;
569 while (k < nf) {
570 if (!vm && !vext_elem_mask(v0, i)) {
571 /* set masked-off elements to 1s */
572 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
573 (i + k * max_elems + 1) * esz);
574 k++;
575 continue;
576 }
577 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
578 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
579 k++;
580 }
581 }
582 env->vstart = 0;
583
584 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
585 }
586
587 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
588 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
589 void *vs2, CPURISCVState *env, uint32_t desc) \
590 { \
591 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
592 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
593 }
594
GEN_VEXT_LD_INDEX(vlxei8_8_v,int8_t,idx_b,lde_b_tlb)595 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb)
596 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb)
597 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb)
598 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb)
599 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb)
600 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb)
601 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb)
602 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb)
603 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb)
604 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb)
605 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb)
606 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb)
607 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb)
608 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb)
609 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb)
610 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb)
611
612 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
613 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
614 void *vs2, CPURISCVState *env, uint32_t desc) \
615 { \
616 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
617 STORE_FN, ctzl(sizeof(ETYPE)), \
618 GETPC()); \
619 }
620
621 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb)
622 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb)
623 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb)
624 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb)
625 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb)
626 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb)
627 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb)
628 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb)
629 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb)
630 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb)
631 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb)
632 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb)
633 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb)
634 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb)
635 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb)
636 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb)
637
638 /*
639 * unit-stride fault-only-fisrt load instructions
640 */
641 static inline void
642 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env,
643 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb,
644 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra)
645 {
646 uint32_t i, k, vl = 0;
647 uint32_t nf = vext_nf(desc);
648 uint32_t vm = vext_vm(desc);
649 uint32_t max_elems = vext_max_elems(desc, log2_esz);
650 uint32_t esz = 1 << log2_esz;
651 uint32_t msize = nf * esz;
652 uint32_t vma = vext_vma(desc);
653 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems;
654 int mmu_index = riscv_env_mmu_index(env, false);
655 int flags, probe_flags;
656 void *host;
657
658 VSTART_CHECK_EARLY_EXIT(env, env->vl);
659
660 addr = base + ((env->vstart * nf) << log2_esz);
661 page_split = -(addr | TARGET_PAGE_MASK);
662 /* Get number of elements */
663 elems = page_split / msize;
664 if (unlikely(env->vstart + elems >= env->vl)) {
665 elems = env->vl - env->vstart;
666 }
667
668 /* Check page permission/pmp/watchpoint/etc. */
669 probe_pages(env, addr, elems * msize, ra, MMU_DATA_LOAD, mmu_index, &host,
670 &flags, true);
671
672 /* If we are crossing a page check also the second page. */
673 if (env->vl > elems) {
674 addr_probe = addr + (elems << log2_esz);
675 probe_pages(env, addr_probe, elems * msize, ra, MMU_DATA_LOAD,
676 mmu_index, &host, &probe_flags, true);
677 flags |= probe_flags;
678 }
679
680 if (flags & ~TLB_WATCHPOINT) {
681 /* probe every access */
682 for (i = env->vstart; i < env->vl; i++) {
683 if (!vm && !vext_elem_mask(v0, i)) {
684 continue;
685 }
686 addr_i = adjust_addr(env, base + i * (nf << log2_esz));
687 if (i == 0) {
688 /* Allow fault on first element. */
689 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD,
690 mmu_index, &host, NULL, false);
691 } else {
692 remain = nf << log2_esz;
693 while (remain > 0) {
694 offset = -(addr_i | TARGET_PAGE_MASK);
695
696 /* Probe nonfault on subsequent elements. */
697 probe_pages(env, addr_i, offset, 0, MMU_DATA_LOAD,
698 mmu_index, &host, &flags, true);
699
700 /*
701 * Stop if invalid (unmapped) or mmio (transaction may
702 * fail). Do not stop if watchpoint, as the spec says that
703 * first-fault should continue to access the same
704 * elements regardless of any watchpoint.
705 */
706 if (flags & ~TLB_WATCHPOINT) {
707 vl = i;
708 goto ProbeSuccess;
709 }
710 if (remain <= offset) {
711 break;
712 }
713 remain -= offset;
714 addr_i = adjust_addr(env, addr_i + offset);
715 }
716 }
717 }
718 }
719 ProbeSuccess:
720 /* load bytes from guest memory */
721 if (vl != 0) {
722 env->vl = vl;
723 }
724
725 if (env->vstart < env->vl) {
726 if (vm) {
727 /* Load/store elements in the first page */
728 if (likely(elems)) {
729 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
730 log2_esz, true, mmu_index, ldst_tlb,
731 ldst_host, ra);
732 }
733
734 /* Load/store elements in the second page */
735 if (unlikely(env->vstart < env->vl)) {
736 /* Cross page element */
737 if (unlikely(page_split % msize)) {
738 for (k = 0; k < nf; k++) {
739 addr = base + ((env->vstart * nf + k) << log2_esz);
740 ldst_tlb(env, adjust_addr(env, addr),
741 env->vstart + k * max_elems, vd, ra);
742 }
743 env->vstart++;
744 }
745
746 addr = base + ((env->vstart * nf) << log2_esz);
747 /* Get number of elements of second page */
748 elems = env->vl - env->vstart;
749
750 /* Load/store elements in the second page */
751 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
752 log2_esz, true, mmu_index, ldst_tlb,
753 ldst_host, ra);
754 }
755 } else {
756 for (i = env->vstart; i < env->vl; i++) {
757 k = 0;
758 while (k < nf) {
759 if (!vext_elem_mask(v0, i)) {
760 /* set masked-off elements to 1s */
761 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
762 (i + k * max_elems + 1) * esz);
763 k++;
764 continue;
765 }
766 addr = base + ((i * nf + k) << log2_esz);
767 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
768 vd, ra);
769 k++;
770 }
771 }
772 }
773 }
774 env->vstart = 0;
775
776 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
777 }
778
779 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
780 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
781 CPURISCVState *env, uint32_t desc) \
782 { \
783 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \
784 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \
785 }
786
GEN_VEXT_LDFF(vle8ff_v,int8_t,lde_b_tlb,lde_b_host)787 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host)
788 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host)
789 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host)
790 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host)
791
792 #define DO_SWAP(N, M) (M)
793 #define DO_AND(N, M) (N & M)
794 #define DO_XOR(N, M) (N ^ M)
795 #define DO_OR(N, M) (N | M)
796 #define DO_ADD(N, M) (N + M)
797
798 /* Signed min/max */
799 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
800 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
801
802 /*
803 * load and store whole register instructions
804 */
805 static inline QEMU_ALWAYS_INLINE void
806 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
807 vext_ldst_elem_fn_tlb *ldst_tlb,
808 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
809 uintptr_t ra, bool is_load)
810 {
811 target_ulong page_split, elems, addr;
812 uint32_t nf = vext_nf(desc);
813 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
814 uint32_t max_elems = vlenb >> log2_esz;
815 uint32_t evl = nf * max_elems;
816 uint32_t esz = 1 << log2_esz;
817 int mmu_index = riscv_env_mmu_index(env, false);
818
819 /* Calculate the page range of first page */
820 addr = base + (env->vstart << log2_esz);
821 page_split = -(addr | TARGET_PAGE_MASK);
822 /* Get number of elements */
823 elems = page_split / esz;
824 if (unlikely(env->vstart + elems >= evl)) {
825 elems = evl - env->vstart;
826 }
827
828 /* Load/store elements in the first page */
829 if (likely(elems)) {
830 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
831 is_load, mmu_index, ldst_tlb, ldst_host, ra);
832 }
833
834 /* Load/store elements in the second page */
835 if (unlikely(env->vstart < evl)) {
836 /* Cross page element */
837 if (unlikely(page_split % esz)) {
838 addr = base + (env->vstart << log2_esz);
839 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra);
840 env->vstart++;
841 }
842
843 addr = base + (env->vstart << log2_esz);
844 /* Get number of elements of second page */
845 elems = evl - env->vstart;
846
847 /* Load/store elements in the second page */
848 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
849 is_load, mmu_index, ldst_tlb, ldst_host, ra);
850 }
851
852 env->vstart = 0;
853 }
854
855 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
856 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
857 uint32_t desc) \
858 { \
859 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
860 ctzl(sizeof(ETYPE)), GETPC(), true); \
861 }
862
GEN_VEXT_LD_WHOLE(vl1re8_v,int8_t,lde_b_tlb,lde_b_host)863 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host)
864 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host)
865 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host)
866 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host)
867 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host)
868 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host)
869 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host)
870 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host)
871 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host)
872 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host)
873 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host)
874 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host)
875 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host)
876 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host)
877 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host)
878 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host)
879
880 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
881 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
882 uint32_t desc) \
883 { \
884 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
885 ctzl(sizeof(ETYPE)), GETPC(), false); \
886 }
887
888 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host)
889 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host)
890 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host)
891 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host)
892
893 /*
894 * Vector Integer Arithmetic Instructions
895 */
896
897 /* (TD, T1, T2, TX1, TX2) */
898 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
899 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
900 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
901 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
902 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
903 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
904 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
905 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
906 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
907 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
908 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
909 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
910 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
911 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
912 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
913 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
914 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
915 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
916 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
917 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
918 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
919 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
920 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
921
922 #define DO_SUB(N, M) (N - M)
923 #define DO_RSUB(N, M) (M - N)
924
925 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
926 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
927 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
928 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
929 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
930 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
931 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
932 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
933
934 GEN_VEXT_VV(vadd_vv_b, 1)
935 GEN_VEXT_VV(vadd_vv_h, 2)
936 GEN_VEXT_VV(vadd_vv_w, 4)
937 GEN_VEXT_VV(vadd_vv_d, 8)
938 GEN_VEXT_VV(vsub_vv_b, 1)
939 GEN_VEXT_VV(vsub_vv_h, 2)
940 GEN_VEXT_VV(vsub_vv_w, 4)
941 GEN_VEXT_VV(vsub_vv_d, 8)
942
943
944 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
945 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
946 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
947 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
948 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
949 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
950 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
951 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
952 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
953 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
954 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
955 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
956
957 GEN_VEXT_VX(vadd_vx_b, 1)
958 GEN_VEXT_VX(vadd_vx_h, 2)
959 GEN_VEXT_VX(vadd_vx_w, 4)
960 GEN_VEXT_VX(vadd_vx_d, 8)
961 GEN_VEXT_VX(vsub_vx_b, 1)
962 GEN_VEXT_VX(vsub_vx_h, 2)
963 GEN_VEXT_VX(vsub_vx_w, 4)
964 GEN_VEXT_VX(vsub_vx_d, 8)
965 GEN_VEXT_VX(vrsub_vx_b, 1)
966 GEN_VEXT_VX(vrsub_vx_h, 2)
967 GEN_VEXT_VX(vrsub_vx_w, 4)
968 GEN_VEXT_VX(vrsub_vx_d, 8)
969
970 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
971 {
972 intptr_t oprsz = simd_oprsz(desc);
973 intptr_t i;
974
975 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
976 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
977 }
978 }
979
HELPER(vec_rsubs16)980 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
981 {
982 intptr_t oprsz = simd_oprsz(desc);
983 intptr_t i;
984
985 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
986 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
987 }
988 }
989
HELPER(vec_rsubs32)990 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
991 {
992 intptr_t oprsz = simd_oprsz(desc);
993 intptr_t i;
994
995 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
996 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
997 }
998 }
999
HELPER(vec_rsubs64)1000 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
1001 {
1002 intptr_t oprsz = simd_oprsz(desc);
1003 intptr_t i;
1004
1005 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1006 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
1007 }
1008 }
1009
1010 /* Vector Widening Integer Add/Subtract */
1011 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
1012 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
1013 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
1014 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
1015 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
1016 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
1017 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
1018 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
1019 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
1020 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
1021 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
1022 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
RVVCALL(OPIVV2,vwaddu_vv_b,WOP_UUU_B,H2,H1,H1,DO_ADD)1023 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
1024 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
1025 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
1026 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
1027 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
1028 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1029 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1030 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1031 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1032 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1033 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1034 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1035 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1036 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1037 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1038 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1039 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1040 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1041 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1042 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1043 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1044 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1045 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1046 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1047 GEN_VEXT_VV(vwaddu_vv_b, 2)
1048 GEN_VEXT_VV(vwaddu_vv_h, 4)
1049 GEN_VEXT_VV(vwaddu_vv_w, 8)
1050 GEN_VEXT_VV(vwsubu_vv_b, 2)
1051 GEN_VEXT_VV(vwsubu_vv_h, 4)
1052 GEN_VEXT_VV(vwsubu_vv_w, 8)
1053 GEN_VEXT_VV(vwadd_vv_b, 2)
1054 GEN_VEXT_VV(vwadd_vv_h, 4)
1055 GEN_VEXT_VV(vwadd_vv_w, 8)
1056 GEN_VEXT_VV(vwsub_vv_b, 2)
1057 GEN_VEXT_VV(vwsub_vv_h, 4)
1058 GEN_VEXT_VV(vwsub_vv_w, 8)
1059 GEN_VEXT_VV(vwaddu_wv_b, 2)
1060 GEN_VEXT_VV(vwaddu_wv_h, 4)
1061 GEN_VEXT_VV(vwaddu_wv_w, 8)
1062 GEN_VEXT_VV(vwsubu_wv_b, 2)
1063 GEN_VEXT_VV(vwsubu_wv_h, 4)
1064 GEN_VEXT_VV(vwsubu_wv_w, 8)
1065 GEN_VEXT_VV(vwadd_wv_b, 2)
1066 GEN_VEXT_VV(vwadd_wv_h, 4)
1067 GEN_VEXT_VV(vwadd_wv_w, 8)
1068 GEN_VEXT_VV(vwsub_wv_b, 2)
1069 GEN_VEXT_VV(vwsub_wv_h, 4)
1070 GEN_VEXT_VV(vwsub_wv_w, 8)
1071
1072 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1073 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1074 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1075 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1076 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1077 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1078 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1079 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1080 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1081 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1082 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1083 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1084 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1085 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1086 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1087 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1088 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1089 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1090 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1091 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1092 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1093 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1094 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1095 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1096 GEN_VEXT_VX(vwaddu_vx_b, 2)
1097 GEN_VEXT_VX(vwaddu_vx_h, 4)
1098 GEN_VEXT_VX(vwaddu_vx_w, 8)
1099 GEN_VEXT_VX(vwsubu_vx_b, 2)
1100 GEN_VEXT_VX(vwsubu_vx_h, 4)
1101 GEN_VEXT_VX(vwsubu_vx_w, 8)
1102 GEN_VEXT_VX(vwadd_vx_b, 2)
1103 GEN_VEXT_VX(vwadd_vx_h, 4)
1104 GEN_VEXT_VX(vwadd_vx_w, 8)
1105 GEN_VEXT_VX(vwsub_vx_b, 2)
1106 GEN_VEXT_VX(vwsub_vx_h, 4)
1107 GEN_VEXT_VX(vwsub_vx_w, 8)
1108 GEN_VEXT_VX(vwaddu_wx_b, 2)
1109 GEN_VEXT_VX(vwaddu_wx_h, 4)
1110 GEN_VEXT_VX(vwaddu_wx_w, 8)
1111 GEN_VEXT_VX(vwsubu_wx_b, 2)
1112 GEN_VEXT_VX(vwsubu_wx_h, 4)
1113 GEN_VEXT_VX(vwsubu_wx_w, 8)
1114 GEN_VEXT_VX(vwadd_wx_b, 2)
1115 GEN_VEXT_VX(vwadd_wx_h, 4)
1116 GEN_VEXT_VX(vwadd_wx_w, 8)
1117 GEN_VEXT_VX(vwsub_wx_b, 2)
1118 GEN_VEXT_VX(vwsub_wx_h, 4)
1119 GEN_VEXT_VX(vwsub_wx_w, 8)
1120
1121 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1122 #define DO_VADC(N, M, C) (N + M + C)
1123 #define DO_VSBC(N, M, C) (N - M - C)
1124
1125 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1126 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1127 CPURISCVState *env, uint32_t desc) \
1128 { \
1129 uint32_t vl = env->vl; \
1130 uint32_t esz = sizeof(ETYPE); \
1131 uint32_t total_elems = \
1132 vext_get_total_elems(env, desc, esz); \
1133 uint32_t vta = vext_vta(desc); \
1134 uint32_t i; \
1135 \
1136 VSTART_CHECK_EARLY_EXIT(env, vl); \
1137 \
1138 for (i = env->vstart; i < vl; i++) { \
1139 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1140 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1141 ETYPE carry = vext_elem_mask(v0, i); \
1142 \
1143 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1144 } \
1145 env->vstart = 0; \
1146 /* set tail elements to 1s */ \
1147 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1148 }
1149
1150 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1151 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1152 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1153 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1154
1155 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1156 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1157 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1158 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1159
1160 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1161 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1162 CPURISCVState *env, uint32_t desc) \
1163 { \
1164 uint32_t vl = env->vl; \
1165 uint32_t esz = sizeof(ETYPE); \
1166 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1167 uint32_t vta = vext_vta(desc); \
1168 uint32_t i; \
1169 \
1170 VSTART_CHECK_EARLY_EXIT(env, vl); \
1171 \
1172 for (i = env->vstart; i < vl; i++) { \
1173 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1174 ETYPE carry = vext_elem_mask(v0, i); \
1175 \
1176 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1177 } \
1178 env->vstart = 0; \
1179 /* set tail elements to 1s */ \
1180 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1181 }
1182
1183 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1184 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1185 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1186 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1187
1188 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1189 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1190 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1191 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1192
1193 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1194 (__typeof(N))(N + M) < N)
1195 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1196
1197 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1198 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1199 CPURISCVState *env, uint32_t desc) \
1200 { \
1201 uint32_t vl = env->vl; \
1202 uint32_t vm = vext_vm(desc); \
1203 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1204 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1205 uint32_t i; \
1206 \
1207 VSTART_CHECK_EARLY_EXIT(env, vl); \
1208 \
1209 for (i = env->vstart; i < vl; i++) { \
1210 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1211 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1212 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1213 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1214 } \
1215 env->vstart = 0; \
1216 /*
1217 * mask destination register are always tail-agnostic
1218 * set tail elements to 1s
1219 */ \
1220 if (vta_all_1s) { \
1221 for (; i < total_elems; i++) { \
1222 vext_set_elem_mask(vd, i, 1); \
1223 } \
1224 } \
1225 }
1226
1227 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1228 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1229 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1230 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1231
1232 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1233 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1234 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1235 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1236
1237 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1238 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1239 void *vs2, CPURISCVState *env, uint32_t desc) \
1240 { \
1241 uint32_t vl = env->vl; \
1242 uint32_t vm = vext_vm(desc); \
1243 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1244 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1245 uint32_t i; \
1246 \
1247 VSTART_CHECK_EARLY_EXIT(env, vl); \
1248 \
1249 for (i = env->vstart; i < vl; i++) { \
1250 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1251 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1252 vext_set_elem_mask(vd, i, \
1253 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1254 } \
1255 env->vstart = 0; \
1256 /*
1257 * mask destination register are always tail-agnostic
1258 * set tail elements to 1s
1259 */ \
1260 if (vta_all_1s) { \
1261 for (; i < total_elems; i++) { \
1262 vext_set_elem_mask(vd, i, 1); \
1263 } \
1264 } \
1265 }
1266
1267 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1268 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1269 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1270 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1271
1272 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1273 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1274 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1275 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1276
1277 /* Vector Bitwise Logical Instructions */
1278 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1279 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1280 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1281 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1282 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1283 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1284 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1285 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1286 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1287 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1288 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1289 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1290 GEN_VEXT_VV(vand_vv_b, 1)
1291 GEN_VEXT_VV(vand_vv_h, 2)
1292 GEN_VEXT_VV(vand_vv_w, 4)
1293 GEN_VEXT_VV(vand_vv_d, 8)
1294 GEN_VEXT_VV(vor_vv_b, 1)
1295 GEN_VEXT_VV(vor_vv_h, 2)
1296 GEN_VEXT_VV(vor_vv_w, 4)
1297 GEN_VEXT_VV(vor_vv_d, 8)
1298 GEN_VEXT_VV(vxor_vv_b, 1)
1299 GEN_VEXT_VV(vxor_vv_h, 2)
1300 GEN_VEXT_VV(vxor_vv_w, 4)
1301 GEN_VEXT_VV(vxor_vv_d, 8)
1302
1303 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1304 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1305 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1306 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1307 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1308 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1309 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1310 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1311 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1312 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1313 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1314 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1315 GEN_VEXT_VX(vand_vx_b, 1)
1316 GEN_VEXT_VX(vand_vx_h, 2)
1317 GEN_VEXT_VX(vand_vx_w, 4)
1318 GEN_VEXT_VX(vand_vx_d, 8)
1319 GEN_VEXT_VX(vor_vx_b, 1)
1320 GEN_VEXT_VX(vor_vx_h, 2)
1321 GEN_VEXT_VX(vor_vx_w, 4)
1322 GEN_VEXT_VX(vor_vx_d, 8)
1323 GEN_VEXT_VX(vxor_vx_b, 1)
1324 GEN_VEXT_VX(vxor_vx_h, 2)
1325 GEN_VEXT_VX(vxor_vx_w, 4)
1326 GEN_VEXT_VX(vxor_vx_d, 8)
1327
1328 /* Vector Single-Width Bit Shift Instructions */
1329 #define DO_SLL(N, M) (N << (M))
1330 #define DO_SRL(N, M) (N >> (M))
1331
1332 /* generate the helpers for shift instructions with two vector operators */
1333 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1334 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1335 void *vs2, CPURISCVState *env, uint32_t desc) \
1336 { \
1337 uint32_t vm = vext_vm(desc); \
1338 uint32_t vl = env->vl; \
1339 uint32_t esz = sizeof(TS1); \
1340 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1341 uint32_t vta = vext_vta(desc); \
1342 uint32_t vma = vext_vma(desc); \
1343 uint32_t i; \
1344 \
1345 VSTART_CHECK_EARLY_EXIT(env, vl); \
1346 \
1347 for (i = env->vstart; i < vl; i++) { \
1348 if (!vm && !vext_elem_mask(v0, i)) { \
1349 /* set masked-off elements to 1s */ \
1350 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1351 continue; \
1352 } \
1353 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1354 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1355 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1356 } \
1357 env->vstart = 0; \
1358 /* set tail elements to 1s */ \
1359 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1360 }
1361
1362 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1363 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1364 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1365 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1366
1367 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1368 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1369 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1370 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1371
1372 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1373 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1374 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1375 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1376
1377 /*
1378 * generate the helpers for shift instructions with one vector and one scalar
1379 */
1380 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1381 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1382 void *vs2, CPURISCVState *env, \
1383 uint32_t desc) \
1384 { \
1385 uint32_t vm = vext_vm(desc); \
1386 uint32_t vl = env->vl; \
1387 uint32_t esz = sizeof(TD); \
1388 uint32_t total_elems = \
1389 vext_get_total_elems(env, desc, esz); \
1390 uint32_t vta = vext_vta(desc); \
1391 uint32_t vma = vext_vma(desc); \
1392 uint32_t i; \
1393 \
1394 VSTART_CHECK_EARLY_EXIT(env, vl); \
1395 \
1396 for (i = env->vstart; i < vl; i++) { \
1397 if (!vm && !vext_elem_mask(v0, i)) { \
1398 /* set masked-off elements to 1s */ \
1399 vext_set_elems_1s(vd, vma, i * esz, \
1400 (i + 1) * esz); \
1401 continue; \
1402 } \
1403 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1404 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1405 } \
1406 env->vstart = 0; \
1407 /* set tail elements to 1s */ \
1408 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1409 }
1410
1411 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1412 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1413 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1414 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1415
1416 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1417 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1418 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1419 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1420
1421 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1422 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1423 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1424 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1425
1426 /* Vector Narrowing Integer Right Shift Instructions */
1427 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1428 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1429 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1430 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1431 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1432 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1433 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1434 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1435 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1436 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1437 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1438 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1439
1440 /* Vector Integer Comparison Instructions */
1441 #define DO_MSEQ(N, M) (N == M)
1442 #define DO_MSNE(N, M) (N != M)
1443 #define DO_MSLT(N, M) (N < M)
1444 #define DO_MSLE(N, M) (N <= M)
1445 #define DO_MSGT(N, M) (N > M)
1446
1447 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1448 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1449 CPURISCVState *env, uint32_t desc) \
1450 { \
1451 uint32_t vm = vext_vm(desc); \
1452 uint32_t vl = env->vl; \
1453 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1454 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1455 uint32_t vma = vext_vma(desc); \
1456 uint32_t i; \
1457 \
1458 VSTART_CHECK_EARLY_EXIT(env, vl); \
1459 \
1460 for (i = env->vstart; i < vl; i++) { \
1461 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1462 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1463 if (!vm && !vext_elem_mask(v0, i)) { \
1464 /* set masked-off elements to 1s */ \
1465 if (vma) { \
1466 vext_set_elem_mask(vd, i, 1); \
1467 } \
1468 continue; \
1469 } \
1470 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1471 } \
1472 env->vstart = 0; \
1473 /*
1474 * mask destination register are always tail-agnostic
1475 * set tail elements to 1s
1476 */ \
1477 if (vta_all_1s) { \
1478 for (; i < total_elems; i++) { \
1479 vext_set_elem_mask(vd, i, 1); \
1480 } \
1481 } \
1482 }
1483
1484 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1485 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1486 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1487 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1488
1489 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1490 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1491 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1492 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1493
1494 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1495 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1496 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1497 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1498
1499 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1500 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1501 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1502 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1503
1504 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1505 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1506 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1507 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1508
1509 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1510 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1511 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1512 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1513
1514 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1515 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1516 CPURISCVState *env, uint32_t desc) \
1517 { \
1518 uint32_t vm = vext_vm(desc); \
1519 uint32_t vl = env->vl; \
1520 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1521 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1522 uint32_t vma = vext_vma(desc); \
1523 uint32_t i; \
1524 \
1525 VSTART_CHECK_EARLY_EXIT(env, vl); \
1526 \
1527 for (i = env->vstart; i < vl; i++) { \
1528 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1529 if (!vm && !vext_elem_mask(v0, i)) { \
1530 /* set masked-off elements to 1s */ \
1531 if (vma) { \
1532 vext_set_elem_mask(vd, i, 1); \
1533 } \
1534 continue; \
1535 } \
1536 vext_set_elem_mask(vd, i, \
1537 DO_OP(s2, (ETYPE)(target_long)s1)); \
1538 } \
1539 env->vstart = 0; \
1540 /*
1541 * mask destination register are always tail-agnostic
1542 * set tail elements to 1s
1543 */ \
1544 if (vta_all_1s) { \
1545 for (; i < total_elems; i++) { \
1546 vext_set_elem_mask(vd, i, 1); \
1547 } \
1548 } \
1549 }
1550
1551 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1552 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1553 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1554 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1555
1556 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1557 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1558 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1559 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1560
1561 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1562 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1563 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1564 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1565
1566 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1567 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1568 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1569 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1570
1571 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1572 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1573 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1574 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1575
1576 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1577 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1578 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1579 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1580
1581 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1582 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1583 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1584 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1585
1586 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1587 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1588 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1589 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1590
1591 /* Vector Integer Min/Max Instructions */
1592 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1593 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1594 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1595 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1596 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1597 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1598 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1599 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1600 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1601 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1602 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1603 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1604 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1605 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1606 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1607 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1608 GEN_VEXT_VV(vminu_vv_b, 1)
1609 GEN_VEXT_VV(vminu_vv_h, 2)
1610 GEN_VEXT_VV(vminu_vv_w, 4)
1611 GEN_VEXT_VV(vminu_vv_d, 8)
1612 GEN_VEXT_VV(vmin_vv_b, 1)
1613 GEN_VEXT_VV(vmin_vv_h, 2)
1614 GEN_VEXT_VV(vmin_vv_w, 4)
1615 GEN_VEXT_VV(vmin_vv_d, 8)
1616 GEN_VEXT_VV(vmaxu_vv_b, 1)
1617 GEN_VEXT_VV(vmaxu_vv_h, 2)
1618 GEN_VEXT_VV(vmaxu_vv_w, 4)
1619 GEN_VEXT_VV(vmaxu_vv_d, 8)
1620 GEN_VEXT_VV(vmax_vv_b, 1)
1621 GEN_VEXT_VV(vmax_vv_h, 2)
1622 GEN_VEXT_VV(vmax_vv_w, 4)
1623 GEN_VEXT_VV(vmax_vv_d, 8)
1624
1625 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1626 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1627 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1628 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1629 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1630 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1631 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1632 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1633 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1634 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1635 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1636 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1637 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1638 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1639 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1640 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1641 GEN_VEXT_VX(vminu_vx_b, 1)
1642 GEN_VEXT_VX(vminu_vx_h, 2)
1643 GEN_VEXT_VX(vminu_vx_w, 4)
1644 GEN_VEXT_VX(vminu_vx_d, 8)
1645 GEN_VEXT_VX(vmin_vx_b, 1)
1646 GEN_VEXT_VX(vmin_vx_h, 2)
1647 GEN_VEXT_VX(vmin_vx_w, 4)
1648 GEN_VEXT_VX(vmin_vx_d, 8)
1649 GEN_VEXT_VX(vmaxu_vx_b, 1)
1650 GEN_VEXT_VX(vmaxu_vx_h, 2)
1651 GEN_VEXT_VX(vmaxu_vx_w, 4)
1652 GEN_VEXT_VX(vmaxu_vx_d, 8)
1653 GEN_VEXT_VX(vmax_vx_b, 1)
1654 GEN_VEXT_VX(vmax_vx_h, 2)
1655 GEN_VEXT_VX(vmax_vx_w, 4)
1656 GEN_VEXT_VX(vmax_vx_d, 8)
1657
1658 /* Vector Single-Width Integer Multiply Instructions */
1659 #define DO_MUL(N, M) (N * M)
1660 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1661 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1662 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1663 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1664 GEN_VEXT_VV(vmul_vv_b, 1)
1665 GEN_VEXT_VV(vmul_vv_h, 2)
1666 GEN_VEXT_VV(vmul_vv_w, 4)
1667 GEN_VEXT_VV(vmul_vv_d, 8)
1668
1669 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1670 {
1671 return (int16_t)s2 * (int16_t)s1 >> 8;
1672 }
1673
do_mulh_h(int16_t s2,int16_t s1)1674 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1675 {
1676 return (int32_t)s2 * (int32_t)s1 >> 16;
1677 }
1678
do_mulh_w(int32_t s2,int32_t s1)1679 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1680 {
1681 return (int64_t)s2 * (int64_t)s1 >> 32;
1682 }
1683
do_mulh_d(int64_t s2,int64_t s1)1684 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1685 {
1686 uint64_t hi_64, lo_64;
1687
1688 muls64(&lo_64, &hi_64, s1, s2);
1689 return hi_64;
1690 }
1691
do_mulhu_b(uint8_t s2,uint8_t s1)1692 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1693 {
1694 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1695 }
1696
do_mulhu_h(uint16_t s2,uint16_t s1)1697 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1698 {
1699 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1700 }
1701
do_mulhu_w(uint32_t s2,uint32_t s1)1702 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1703 {
1704 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1705 }
1706
do_mulhu_d(uint64_t s2,uint64_t s1)1707 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1708 {
1709 uint64_t hi_64, lo_64;
1710
1711 mulu64(&lo_64, &hi_64, s2, s1);
1712 return hi_64;
1713 }
1714
do_mulhsu_b(int8_t s2,uint8_t s1)1715 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1716 {
1717 return (int16_t)s2 * (uint16_t)s1 >> 8;
1718 }
1719
do_mulhsu_h(int16_t s2,uint16_t s1)1720 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1721 {
1722 return (int32_t)s2 * (uint32_t)s1 >> 16;
1723 }
1724
do_mulhsu_w(int32_t s2,uint32_t s1)1725 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1726 {
1727 return (int64_t)s2 * (uint64_t)s1 >> 32;
1728 }
1729
1730 /*
1731 * Let A = signed operand,
1732 * B = unsigned operand
1733 * P = mulu64(A, B), unsigned product
1734 *
1735 * LET X = 2 ** 64 - A, 2's complement of A
1736 * SP = signed product
1737 * THEN
1738 * IF A < 0
1739 * SP = -X * B
1740 * = -(2 ** 64 - A) * B
1741 * = A * B - 2 ** 64 * B
1742 * = P - 2 ** 64 * B
1743 * ELSE
1744 * SP = P
1745 * THEN
1746 * HI_P -= (A < 0 ? B : 0)
1747 */
1748
do_mulhsu_d(int64_t s2,uint64_t s1)1749 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1750 {
1751 uint64_t hi_64, lo_64;
1752
1753 mulu64(&lo_64, &hi_64, s2, s1);
1754
1755 hi_64 -= s2 < 0 ? s1 : 0;
1756 return hi_64;
1757 }
1758
1759 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1760 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1761 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1762 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1763 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1764 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1765 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1766 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1767 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1768 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1769 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1770 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1771 GEN_VEXT_VV(vmulh_vv_b, 1)
1772 GEN_VEXT_VV(vmulh_vv_h, 2)
1773 GEN_VEXT_VV(vmulh_vv_w, 4)
1774 GEN_VEXT_VV(vmulh_vv_d, 8)
1775 GEN_VEXT_VV(vmulhu_vv_b, 1)
1776 GEN_VEXT_VV(vmulhu_vv_h, 2)
1777 GEN_VEXT_VV(vmulhu_vv_w, 4)
1778 GEN_VEXT_VV(vmulhu_vv_d, 8)
1779 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1780 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1781 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1782 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1783
1784 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1785 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1786 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1787 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1788 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1789 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1790 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1791 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1792 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1793 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1794 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1795 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1796 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1797 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1798 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1799 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1800 GEN_VEXT_VX(vmul_vx_b, 1)
1801 GEN_VEXT_VX(vmul_vx_h, 2)
1802 GEN_VEXT_VX(vmul_vx_w, 4)
1803 GEN_VEXT_VX(vmul_vx_d, 8)
1804 GEN_VEXT_VX(vmulh_vx_b, 1)
1805 GEN_VEXT_VX(vmulh_vx_h, 2)
1806 GEN_VEXT_VX(vmulh_vx_w, 4)
1807 GEN_VEXT_VX(vmulh_vx_d, 8)
1808 GEN_VEXT_VX(vmulhu_vx_b, 1)
1809 GEN_VEXT_VX(vmulhu_vx_h, 2)
1810 GEN_VEXT_VX(vmulhu_vx_w, 4)
1811 GEN_VEXT_VX(vmulhu_vx_d, 8)
1812 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1813 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1814 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1815 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1816
1817 /* Vector Integer Divide Instructions */
1818 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1819 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1820 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1821 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1822 #define DO_REM(N, M) (unlikely(M == 0) ? N : \
1823 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1824
1825 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1826 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1827 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1828 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1829 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1830 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1831 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1832 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1833 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1834 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1835 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1836 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1837 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1838 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1839 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1840 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1841 GEN_VEXT_VV(vdivu_vv_b, 1)
1842 GEN_VEXT_VV(vdivu_vv_h, 2)
1843 GEN_VEXT_VV(vdivu_vv_w, 4)
1844 GEN_VEXT_VV(vdivu_vv_d, 8)
1845 GEN_VEXT_VV(vdiv_vv_b, 1)
1846 GEN_VEXT_VV(vdiv_vv_h, 2)
1847 GEN_VEXT_VV(vdiv_vv_w, 4)
1848 GEN_VEXT_VV(vdiv_vv_d, 8)
1849 GEN_VEXT_VV(vremu_vv_b, 1)
1850 GEN_VEXT_VV(vremu_vv_h, 2)
1851 GEN_VEXT_VV(vremu_vv_w, 4)
1852 GEN_VEXT_VV(vremu_vv_d, 8)
1853 GEN_VEXT_VV(vrem_vv_b, 1)
1854 GEN_VEXT_VV(vrem_vv_h, 2)
1855 GEN_VEXT_VV(vrem_vv_w, 4)
1856 GEN_VEXT_VV(vrem_vv_d, 8)
1857
1858 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1859 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1860 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1861 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1862 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1863 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1864 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1865 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1866 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1867 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1868 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1869 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1870 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1871 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1872 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1873 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1874 GEN_VEXT_VX(vdivu_vx_b, 1)
1875 GEN_VEXT_VX(vdivu_vx_h, 2)
1876 GEN_VEXT_VX(vdivu_vx_w, 4)
1877 GEN_VEXT_VX(vdivu_vx_d, 8)
1878 GEN_VEXT_VX(vdiv_vx_b, 1)
1879 GEN_VEXT_VX(vdiv_vx_h, 2)
1880 GEN_VEXT_VX(vdiv_vx_w, 4)
1881 GEN_VEXT_VX(vdiv_vx_d, 8)
1882 GEN_VEXT_VX(vremu_vx_b, 1)
1883 GEN_VEXT_VX(vremu_vx_h, 2)
1884 GEN_VEXT_VX(vremu_vx_w, 4)
1885 GEN_VEXT_VX(vremu_vx_d, 8)
1886 GEN_VEXT_VX(vrem_vx_b, 1)
1887 GEN_VEXT_VX(vrem_vx_h, 2)
1888 GEN_VEXT_VX(vrem_vx_w, 4)
1889 GEN_VEXT_VX(vrem_vx_d, 8)
1890
1891 /* Vector Widening Integer Multiply Instructions */
1892 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1893 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1894 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1895 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1896 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1897 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1898 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1899 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1900 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1901 GEN_VEXT_VV(vwmul_vv_b, 2)
1902 GEN_VEXT_VV(vwmul_vv_h, 4)
1903 GEN_VEXT_VV(vwmul_vv_w, 8)
1904 GEN_VEXT_VV(vwmulu_vv_b, 2)
1905 GEN_VEXT_VV(vwmulu_vv_h, 4)
1906 GEN_VEXT_VV(vwmulu_vv_w, 8)
1907 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1908 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1909 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1910
1911 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1912 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1913 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1914 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1915 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1916 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1917 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1918 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1919 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1920 GEN_VEXT_VX(vwmul_vx_b, 2)
1921 GEN_VEXT_VX(vwmul_vx_h, 4)
1922 GEN_VEXT_VX(vwmul_vx_w, 8)
1923 GEN_VEXT_VX(vwmulu_vx_b, 2)
1924 GEN_VEXT_VX(vwmulu_vx_h, 4)
1925 GEN_VEXT_VX(vwmulu_vx_w, 8)
1926 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1927 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1928 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1929
1930 /* Vector Single-Width Integer Multiply-Add Instructions */
1931 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1932 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1933 { \
1934 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1935 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1936 TD d = *((TD *)vd + HD(i)); \
1937 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1938 }
1939
1940 #define DO_MACC(N, M, D) (M * N + D)
1941 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1942 #define DO_MADD(N, M, D) (M * D + N)
1943 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1944 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1945 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1946 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1947 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1948 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1949 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1950 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1951 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1952 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1953 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1954 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1955 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1956 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1957 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1958 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1959 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1960 GEN_VEXT_VV(vmacc_vv_b, 1)
1961 GEN_VEXT_VV(vmacc_vv_h, 2)
1962 GEN_VEXT_VV(vmacc_vv_w, 4)
1963 GEN_VEXT_VV(vmacc_vv_d, 8)
1964 GEN_VEXT_VV(vnmsac_vv_b, 1)
1965 GEN_VEXT_VV(vnmsac_vv_h, 2)
1966 GEN_VEXT_VV(vnmsac_vv_w, 4)
1967 GEN_VEXT_VV(vnmsac_vv_d, 8)
1968 GEN_VEXT_VV(vmadd_vv_b, 1)
1969 GEN_VEXT_VV(vmadd_vv_h, 2)
1970 GEN_VEXT_VV(vmadd_vv_w, 4)
1971 GEN_VEXT_VV(vmadd_vv_d, 8)
1972 GEN_VEXT_VV(vnmsub_vv_b, 1)
1973 GEN_VEXT_VV(vnmsub_vv_h, 2)
1974 GEN_VEXT_VV(vnmsub_vv_w, 4)
1975 GEN_VEXT_VV(vnmsub_vv_d, 8)
1976
1977 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1978 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1979 { \
1980 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1981 TD d = *((TD *)vd + HD(i)); \
1982 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1983 }
1984
1985 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1986 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1987 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1988 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1989 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1990 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1991 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1992 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1993 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1994 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1995 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1996 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1997 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1998 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1999 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
2000 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
2001 GEN_VEXT_VX(vmacc_vx_b, 1)
2002 GEN_VEXT_VX(vmacc_vx_h, 2)
2003 GEN_VEXT_VX(vmacc_vx_w, 4)
2004 GEN_VEXT_VX(vmacc_vx_d, 8)
2005 GEN_VEXT_VX(vnmsac_vx_b, 1)
2006 GEN_VEXT_VX(vnmsac_vx_h, 2)
2007 GEN_VEXT_VX(vnmsac_vx_w, 4)
2008 GEN_VEXT_VX(vnmsac_vx_d, 8)
2009 GEN_VEXT_VX(vmadd_vx_b, 1)
2010 GEN_VEXT_VX(vmadd_vx_h, 2)
2011 GEN_VEXT_VX(vmadd_vx_w, 4)
2012 GEN_VEXT_VX(vmadd_vx_d, 8)
2013 GEN_VEXT_VX(vnmsub_vx_b, 1)
2014 GEN_VEXT_VX(vnmsub_vx_h, 2)
2015 GEN_VEXT_VX(vnmsub_vx_w, 4)
2016 GEN_VEXT_VX(vnmsub_vx_d, 8)
2017
2018 /* Vector Widening Integer Multiply-Add Instructions */
2019 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
2020 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
2021 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
2022 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
2023 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
2024 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
2025 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
2026 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
2027 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
2028 GEN_VEXT_VV(vwmaccu_vv_b, 2)
2029 GEN_VEXT_VV(vwmaccu_vv_h, 4)
2030 GEN_VEXT_VV(vwmaccu_vv_w, 8)
2031 GEN_VEXT_VV(vwmacc_vv_b, 2)
2032 GEN_VEXT_VV(vwmacc_vv_h, 4)
2033 GEN_VEXT_VV(vwmacc_vv_w, 8)
2034 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
2035 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
2036 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2037
2038 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
2039 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
2040 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
2041 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
2042 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
2043 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2044 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2045 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2046 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2047 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2048 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2049 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2050 GEN_VEXT_VX(vwmaccu_vx_b, 2)
2051 GEN_VEXT_VX(vwmaccu_vx_h, 4)
2052 GEN_VEXT_VX(vwmaccu_vx_w, 8)
2053 GEN_VEXT_VX(vwmacc_vx_b, 2)
2054 GEN_VEXT_VX(vwmacc_vx_h, 4)
2055 GEN_VEXT_VX(vwmacc_vx_w, 8)
2056 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
2057 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
2058 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
2059 GEN_VEXT_VX(vwmaccus_vx_b, 2)
2060 GEN_VEXT_VX(vwmaccus_vx_h, 4)
2061 GEN_VEXT_VX(vwmaccus_vx_w, 8)
2062
2063 /* Vector Integer Merge and Move Instructions */
2064 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
2065 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
2066 uint32_t desc) \
2067 { \
2068 uint32_t vl = env->vl; \
2069 uint32_t esz = sizeof(ETYPE); \
2070 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2071 uint32_t vta = vext_vta(desc); \
2072 uint32_t i; \
2073 \
2074 VSTART_CHECK_EARLY_EXIT(env, vl); \
2075 \
2076 for (i = env->vstart; i < vl; i++) { \
2077 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2078 *((ETYPE *)vd + H(i)) = s1; \
2079 } \
2080 env->vstart = 0; \
2081 /* set tail elements to 1s */ \
2082 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2083 }
2084
2085 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2086 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2087 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2088 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
2089
2090 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
2091 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2092 uint32_t desc) \
2093 { \
2094 uint32_t vl = env->vl; \
2095 uint32_t esz = sizeof(ETYPE); \
2096 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2097 uint32_t vta = vext_vta(desc); \
2098 uint32_t i; \
2099 \
2100 VSTART_CHECK_EARLY_EXIT(env, vl); \
2101 \
2102 for (i = env->vstart; i < vl; i++) { \
2103 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2104 } \
2105 env->vstart = 0; \
2106 /* set tail elements to 1s */ \
2107 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2108 }
2109
2110 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2111 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2112 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2113 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2114
2115 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
2116 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2117 CPURISCVState *env, uint32_t desc) \
2118 { \
2119 uint32_t vl = env->vl; \
2120 uint32_t esz = sizeof(ETYPE); \
2121 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2122 uint32_t vta = vext_vta(desc); \
2123 uint32_t i; \
2124 \
2125 VSTART_CHECK_EARLY_EXIT(env, vl); \
2126 \
2127 for (i = env->vstart; i < vl; i++) { \
2128 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
2129 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2130 } \
2131 env->vstart = 0; \
2132 /* set tail elements to 1s */ \
2133 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2134 }
2135
2136 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2137 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2138 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2139 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2140
2141 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
2142 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2143 void *vs2, CPURISCVState *env, uint32_t desc) \
2144 { \
2145 uint32_t vl = env->vl; \
2146 uint32_t esz = sizeof(ETYPE); \
2147 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2148 uint32_t vta = vext_vta(desc); \
2149 uint32_t i; \
2150 \
2151 VSTART_CHECK_EARLY_EXIT(env, vl); \
2152 \
2153 for (i = env->vstart; i < vl; i++) { \
2154 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
2155 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
2156 (ETYPE)(target_long)s1); \
2157 *((ETYPE *)vd + H(i)) = d; \
2158 } \
2159 env->vstart = 0; \
2160 /* set tail elements to 1s */ \
2161 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2162 }
2163
2164 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2165 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2166 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2167 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2168
2169 /*
2170 * Vector Fixed-Point Arithmetic Instructions
2171 */
2172
2173 /* Vector Single-Width Saturating Add and Subtract */
2174
2175 /*
2176 * As fixed point instructions probably have round mode and saturation,
2177 * define common macros for fixed point here.
2178 */
2179 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2180 CPURISCVState *env, int vxrm);
2181
2182 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2183 static inline void \
2184 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2185 CPURISCVState *env, int vxrm) \
2186 { \
2187 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2188 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2189 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2190 }
2191
2192 static inline void
vext_vv_rm_1(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivv2_rm_fn * fn,uint32_t vma,uint32_t esz)2193 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2194 CPURISCVState *env,
2195 uint32_t vl, uint32_t vm, int vxrm,
2196 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
2197 {
2198 for (uint32_t i = env->vstart; i < vl; i++) {
2199 if (!vm && !vext_elem_mask(v0, i)) {
2200 /* set masked-off elements to 1s */
2201 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2202 continue;
2203 }
2204 fn(vd, vs1, vs2, i, env, vxrm);
2205 }
2206 env->vstart = 0;
2207 }
2208
2209 static inline void
vext_vv_rm_2(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t desc,opivv2_rm_fn * fn,uint32_t esz)2210 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2211 CPURISCVState *env,
2212 uint32_t desc,
2213 opivv2_rm_fn *fn, uint32_t esz)
2214 {
2215 uint32_t vm = vext_vm(desc);
2216 uint32_t vl = env->vl;
2217 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2218 uint32_t vta = vext_vta(desc);
2219 uint32_t vma = vext_vma(desc);
2220
2221 VSTART_CHECK_EARLY_EXIT(env, vl);
2222
2223 switch (env->vxrm) {
2224 case 0: /* rnu */
2225 vext_vv_rm_1(vd, v0, vs1, vs2,
2226 env, vl, vm, 0, fn, vma, esz);
2227 break;
2228 case 1: /* rne */
2229 vext_vv_rm_1(vd, v0, vs1, vs2,
2230 env, vl, vm, 1, fn, vma, esz);
2231 break;
2232 case 2: /* rdn */
2233 vext_vv_rm_1(vd, v0, vs1, vs2,
2234 env, vl, vm, 2, fn, vma, esz);
2235 break;
2236 default: /* rod */
2237 vext_vv_rm_1(vd, v0, vs1, vs2,
2238 env, vl, vm, 3, fn, vma, esz);
2239 break;
2240 }
2241 /* set tail elements to 1s */
2242 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2243 }
2244
2245 /* generate helpers for fixed point instructions with OPIVV format */
2246 #define GEN_VEXT_VV_RM(NAME, ESZ) \
2247 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2248 CPURISCVState *env, uint32_t desc) \
2249 { \
2250 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2251 do_##NAME, ESZ); \
2252 }
2253
saddu8(CPURISCVState * env,int vxrm,uint8_t a,uint8_t b)2254 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2255 uint8_t b)
2256 {
2257 uint8_t res = a + b;
2258 if (res < a) {
2259 res = UINT8_MAX;
2260 env->vxsat = 0x1;
2261 }
2262 return res;
2263 }
2264
saddu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2265 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2266 uint16_t b)
2267 {
2268 uint16_t res = a + b;
2269 if (res < a) {
2270 res = UINT16_MAX;
2271 env->vxsat = 0x1;
2272 }
2273 return res;
2274 }
2275
saddu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2276 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2277 uint32_t b)
2278 {
2279 uint32_t res = a + b;
2280 if (res < a) {
2281 res = UINT32_MAX;
2282 env->vxsat = 0x1;
2283 }
2284 return res;
2285 }
2286
saddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2287 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2288 uint64_t b)
2289 {
2290 uint64_t res = a + b;
2291 if (res < a) {
2292 res = UINT64_MAX;
2293 env->vxsat = 0x1;
2294 }
2295 return res;
2296 }
2297
2298 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2299 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2300 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2301 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2302 GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2303 GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2304 GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2305 GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2306
2307 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2308 CPURISCVState *env, int vxrm);
2309
2310 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2311 static inline void \
2312 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2313 CPURISCVState *env, int vxrm) \
2314 { \
2315 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2316 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2317 }
2318
2319 static inline void
vext_vx_rm_1(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivx2_rm_fn * fn,uint32_t vma,uint32_t esz)2320 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2321 CPURISCVState *env,
2322 uint32_t vl, uint32_t vm, int vxrm,
2323 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2324 {
2325 for (uint32_t i = env->vstart; i < vl; i++) {
2326 if (!vm && !vext_elem_mask(v0, i)) {
2327 /* set masked-off elements to 1s */
2328 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2329 continue;
2330 }
2331 fn(vd, s1, vs2, i, env, vxrm);
2332 }
2333 env->vstart = 0;
2334 }
2335
2336 static inline void
vext_vx_rm_2(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t desc,opivx2_rm_fn * fn,uint32_t esz)2337 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2338 CPURISCVState *env,
2339 uint32_t desc,
2340 opivx2_rm_fn *fn, uint32_t esz)
2341 {
2342 uint32_t vm = vext_vm(desc);
2343 uint32_t vl = env->vl;
2344 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2345 uint32_t vta = vext_vta(desc);
2346 uint32_t vma = vext_vma(desc);
2347
2348 VSTART_CHECK_EARLY_EXIT(env, vl);
2349
2350 switch (env->vxrm) {
2351 case 0: /* rnu */
2352 vext_vx_rm_1(vd, v0, s1, vs2,
2353 env, vl, vm, 0, fn, vma, esz);
2354 break;
2355 case 1: /* rne */
2356 vext_vx_rm_1(vd, v0, s1, vs2,
2357 env, vl, vm, 1, fn, vma, esz);
2358 break;
2359 case 2: /* rdn */
2360 vext_vx_rm_1(vd, v0, s1, vs2,
2361 env, vl, vm, 2, fn, vma, esz);
2362 break;
2363 default: /* rod */
2364 vext_vx_rm_1(vd, v0, s1, vs2,
2365 env, vl, vm, 3, fn, vma, esz);
2366 break;
2367 }
2368 /* set tail elements to 1s */
2369 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2370 }
2371
2372 /* generate helpers for fixed point instructions with OPIVX format */
2373 #define GEN_VEXT_VX_RM(NAME, ESZ) \
2374 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2375 void *vs2, CPURISCVState *env, \
2376 uint32_t desc) \
2377 { \
2378 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2379 do_##NAME, ESZ); \
2380 }
2381
RVVCALL(OPIVX2_RM,vsaddu_vx_b,OP_UUU_B,H1,H1,saddu8)2382 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2383 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2384 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2385 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2386 GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2387 GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2388 GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2389 GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2390
2391 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2392 {
2393 int8_t res = a + b;
2394 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2395 res = a > 0 ? INT8_MAX : INT8_MIN;
2396 env->vxsat = 0x1;
2397 }
2398 return res;
2399 }
2400
sadd16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2401 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2402 int16_t b)
2403 {
2404 int16_t res = a + b;
2405 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2406 res = a > 0 ? INT16_MAX : INT16_MIN;
2407 env->vxsat = 0x1;
2408 }
2409 return res;
2410 }
2411
sadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2412 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2413 int32_t b)
2414 {
2415 int32_t res = a + b;
2416 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2417 res = a > 0 ? INT32_MAX : INT32_MIN;
2418 env->vxsat = 0x1;
2419 }
2420 return res;
2421 }
2422
sadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2423 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2424 int64_t b)
2425 {
2426 int64_t res = a + b;
2427 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2428 res = a > 0 ? INT64_MAX : INT64_MIN;
2429 env->vxsat = 0x1;
2430 }
2431 return res;
2432 }
2433
RVVCALL(OPIVV2_RM,vsadd_vv_b,OP_SSS_B,H1,H1,H1,sadd8)2434 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2435 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2436 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2437 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2438 GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2439 GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2440 GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2441 GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2442
2443 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2444 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2445 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2446 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2447 GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2448 GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2449 GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2450 GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2451
2452 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2453 uint8_t b)
2454 {
2455 uint8_t res = a - b;
2456 if (res > a) {
2457 res = 0;
2458 env->vxsat = 0x1;
2459 }
2460 return res;
2461 }
2462
ssubu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2463 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2464 uint16_t b)
2465 {
2466 uint16_t res = a - b;
2467 if (res > a) {
2468 res = 0;
2469 env->vxsat = 0x1;
2470 }
2471 return res;
2472 }
2473
ssubu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2474 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2475 uint32_t b)
2476 {
2477 uint32_t res = a - b;
2478 if (res > a) {
2479 res = 0;
2480 env->vxsat = 0x1;
2481 }
2482 return res;
2483 }
2484
ssubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2485 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2486 uint64_t b)
2487 {
2488 uint64_t res = a - b;
2489 if (res > a) {
2490 res = 0;
2491 env->vxsat = 0x1;
2492 }
2493 return res;
2494 }
2495
RVVCALL(OPIVV2_RM,vssubu_vv_b,OP_UUU_B,H1,H1,H1,ssubu8)2496 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2497 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2498 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2499 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2500 GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2501 GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2502 GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2503 GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2504
2505 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2506 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2507 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2508 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2509 GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2510 GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2511 GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2512 GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2513
2514 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2515 {
2516 int8_t res = a - b;
2517 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2518 res = a >= 0 ? INT8_MAX : INT8_MIN;
2519 env->vxsat = 0x1;
2520 }
2521 return res;
2522 }
2523
ssub16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2524 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2525 int16_t b)
2526 {
2527 int16_t res = a - b;
2528 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2529 res = a >= 0 ? INT16_MAX : INT16_MIN;
2530 env->vxsat = 0x1;
2531 }
2532 return res;
2533 }
2534
ssub32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2535 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2536 int32_t b)
2537 {
2538 int32_t res = a - b;
2539 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2540 res = a >= 0 ? INT32_MAX : INT32_MIN;
2541 env->vxsat = 0x1;
2542 }
2543 return res;
2544 }
2545
ssub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2546 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2547 int64_t b)
2548 {
2549 int64_t res = a - b;
2550 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2551 res = a >= 0 ? INT64_MAX : INT64_MIN;
2552 env->vxsat = 0x1;
2553 }
2554 return res;
2555 }
2556
RVVCALL(OPIVV2_RM,vssub_vv_b,OP_SSS_B,H1,H1,H1,ssub8)2557 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2558 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2559 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2560 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2561 GEN_VEXT_VV_RM(vssub_vv_b, 1)
2562 GEN_VEXT_VV_RM(vssub_vv_h, 2)
2563 GEN_VEXT_VV_RM(vssub_vv_w, 4)
2564 GEN_VEXT_VV_RM(vssub_vv_d, 8)
2565
2566 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2567 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2568 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2569 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2570 GEN_VEXT_VX_RM(vssub_vx_b, 1)
2571 GEN_VEXT_VX_RM(vssub_vx_h, 2)
2572 GEN_VEXT_VX_RM(vssub_vx_w, 4)
2573 GEN_VEXT_VX_RM(vssub_vx_d, 8)
2574
2575 /* Vector Single-Width Averaging Add and Subtract */
2576 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2577 {
2578 uint8_t d = extract64(v, shift, 1);
2579 uint8_t d1;
2580 uint64_t D1, D2;
2581
2582 if (shift == 0 || shift > 64) {
2583 return 0;
2584 }
2585
2586 d1 = extract64(v, shift - 1, 1);
2587 D1 = extract64(v, 0, shift);
2588 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2589 return d1;
2590 } else if (vxrm == 1) { /* round-to-nearest-even */
2591 if (shift > 1) {
2592 D2 = extract64(v, 0, shift - 1);
2593 return d1 & ((D2 != 0) | d);
2594 } else {
2595 return d1 & d;
2596 }
2597 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2598 return !d & (D1 != 0);
2599 }
2600 return 0; /* round-down (truncate) */
2601 }
2602
aadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2603 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2604 int32_t b)
2605 {
2606 int64_t res = (int64_t)a + b;
2607 uint8_t round = get_round(vxrm, res, 1);
2608
2609 return (res >> 1) + round;
2610 }
2611
aadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2612 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2613 int64_t b)
2614 {
2615 int64_t res = a + b;
2616 uint8_t round = get_round(vxrm, res, 1);
2617 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2618
2619 /* With signed overflow, bit 64 is inverse of bit 63. */
2620 return ((res >> 1) ^ over) + round;
2621 }
2622
RVVCALL(OPIVV2_RM,vaadd_vv_b,OP_SSS_B,H1,H1,H1,aadd32)2623 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2624 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2625 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2626 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2627 GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2628 GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2629 GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2630 GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2631
2632 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2633 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2634 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2635 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2636 GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2637 GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2638 GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2639 GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2640
2641 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2642 uint32_t a, uint32_t b)
2643 {
2644 uint64_t res = (uint64_t)a + b;
2645 uint8_t round = get_round(vxrm, res, 1);
2646
2647 return (res >> 1) + round;
2648 }
2649
aaddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2650 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2651 uint64_t a, uint64_t b)
2652 {
2653 uint64_t res = a + b;
2654 uint8_t round = get_round(vxrm, res, 1);
2655 uint64_t over = (uint64_t)(res < a) << 63;
2656
2657 return ((res >> 1) | over) + round;
2658 }
2659
RVVCALL(OPIVV2_RM,vaaddu_vv_b,OP_UUU_B,H1,H1,H1,aaddu32)2660 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2661 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2662 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2663 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2664 GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2665 GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2666 GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2667 GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2668
2669 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2670 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2671 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2672 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2673 GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2674 GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2675 GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2676 GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2677
2678 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2679 int32_t b)
2680 {
2681 int64_t res = (int64_t)a - b;
2682 uint8_t round = get_round(vxrm, res, 1);
2683
2684 return (res >> 1) + round;
2685 }
2686
asub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2687 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2688 int64_t b)
2689 {
2690 int64_t res = (int64_t)a - b;
2691 uint8_t round = get_round(vxrm, res, 1);
2692 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2693
2694 /* With signed overflow, bit 64 is inverse of bit 63. */
2695 return ((res >> 1) ^ over) + round;
2696 }
2697
RVVCALL(OPIVV2_RM,vasub_vv_b,OP_SSS_B,H1,H1,H1,asub32)2698 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2699 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2700 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2701 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2702 GEN_VEXT_VV_RM(vasub_vv_b, 1)
2703 GEN_VEXT_VV_RM(vasub_vv_h, 2)
2704 GEN_VEXT_VV_RM(vasub_vv_w, 4)
2705 GEN_VEXT_VV_RM(vasub_vv_d, 8)
2706
2707 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2708 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2709 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2710 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2711 GEN_VEXT_VX_RM(vasub_vx_b, 1)
2712 GEN_VEXT_VX_RM(vasub_vx_h, 2)
2713 GEN_VEXT_VX_RM(vasub_vx_w, 4)
2714 GEN_VEXT_VX_RM(vasub_vx_d, 8)
2715
2716 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2717 uint32_t a, uint32_t b)
2718 {
2719 int64_t res = (int64_t)a - b;
2720 uint8_t round = get_round(vxrm, res, 1);
2721
2722 return (res >> 1) + round;
2723 }
2724
asubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2725 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2726 uint64_t a, uint64_t b)
2727 {
2728 uint64_t res = (uint64_t)a - b;
2729 uint8_t round = get_round(vxrm, res, 1);
2730 uint64_t over = (uint64_t)(res > a) << 63;
2731
2732 return ((res >> 1) | over) + round;
2733 }
2734
RVVCALL(OPIVV2_RM,vasubu_vv_b,OP_UUU_B,H1,H1,H1,asubu32)2735 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2736 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2737 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2738 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2739 GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2740 GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2741 GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2742 GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2743
2744 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2745 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2746 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2747 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2748 GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2749 GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2750 GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2751 GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2752
2753 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2754 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2755 {
2756 uint8_t round;
2757 int16_t res;
2758
2759 res = (int16_t)a * (int16_t)b;
2760 round = get_round(vxrm, res, 7);
2761 res = (res >> 7) + round;
2762
2763 if (res > INT8_MAX) {
2764 env->vxsat = 0x1;
2765 return INT8_MAX;
2766 } else if (res < INT8_MIN) {
2767 env->vxsat = 0x1;
2768 return INT8_MIN;
2769 } else {
2770 return res;
2771 }
2772 }
2773
vsmul16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2774 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2775 {
2776 uint8_t round;
2777 int32_t res;
2778
2779 res = (int32_t)a * (int32_t)b;
2780 round = get_round(vxrm, res, 15);
2781 res = (res >> 15) + round;
2782
2783 if (res > INT16_MAX) {
2784 env->vxsat = 0x1;
2785 return INT16_MAX;
2786 } else if (res < INT16_MIN) {
2787 env->vxsat = 0x1;
2788 return INT16_MIN;
2789 } else {
2790 return res;
2791 }
2792 }
2793
vsmul32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2794 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2795 {
2796 uint8_t round;
2797 int64_t res;
2798
2799 res = (int64_t)a * (int64_t)b;
2800 round = get_round(vxrm, res, 31);
2801 res = (res >> 31) + round;
2802
2803 if (res > INT32_MAX) {
2804 env->vxsat = 0x1;
2805 return INT32_MAX;
2806 } else if (res < INT32_MIN) {
2807 env->vxsat = 0x1;
2808 return INT32_MIN;
2809 } else {
2810 return res;
2811 }
2812 }
2813
vsmul64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2814 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2815 {
2816 uint8_t round;
2817 uint64_t hi_64, lo_64;
2818 int64_t res;
2819
2820 if (a == INT64_MIN && b == INT64_MIN) {
2821 env->vxsat = 1;
2822 return INT64_MAX;
2823 }
2824
2825 muls64(&lo_64, &hi_64, a, b);
2826 round = get_round(vxrm, lo_64, 63);
2827 /*
2828 * Cannot overflow, as there are always
2829 * 2 sign bits after multiply.
2830 */
2831 res = (hi_64 << 1) | (lo_64 >> 63);
2832 if (round) {
2833 if (res == INT64_MAX) {
2834 env->vxsat = 1;
2835 } else {
2836 res += 1;
2837 }
2838 }
2839 return res;
2840 }
2841
RVVCALL(OPIVV2_RM,vsmul_vv_b,OP_SSS_B,H1,H1,H1,vsmul8)2842 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2843 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2844 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2845 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2846 GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2847 GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2848 GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2849 GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2850
2851 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2852 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2853 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2854 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2855 GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2856 GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2857 GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2858 GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2859
2860 /* Vector Single-Width Scaling Shift Instructions */
2861 static inline uint8_t
2862 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2863 {
2864 uint8_t round, shift = b & 0x7;
2865 uint8_t res;
2866
2867 round = get_round(vxrm, a, shift);
2868 res = (a >> shift) + round;
2869 return res;
2870 }
2871 static inline uint16_t
vssrl16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2872 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2873 {
2874 uint8_t round, shift = b & 0xf;
2875
2876 round = get_round(vxrm, a, shift);
2877 return (a >> shift) + round;
2878 }
2879 static inline uint32_t
vssrl32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2880 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2881 {
2882 uint8_t round, shift = b & 0x1f;
2883
2884 round = get_round(vxrm, a, shift);
2885 return (a >> shift) + round;
2886 }
2887 static inline uint64_t
vssrl64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2888 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2889 {
2890 uint8_t round, shift = b & 0x3f;
2891
2892 round = get_round(vxrm, a, shift);
2893 return (a >> shift) + round;
2894 }
RVVCALL(OPIVV2_RM,vssrl_vv_b,OP_UUU_B,H1,H1,H1,vssrl8)2895 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2896 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2897 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2898 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2899 GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2900 GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2901 GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2902 GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2903
2904 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2905 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2906 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2907 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2908 GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2909 GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2910 GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2911 GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2912
2913 static inline int8_t
2914 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2915 {
2916 uint8_t round, shift = b & 0x7;
2917
2918 round = get_round(vxrm, a, shift);
2919 return (a >> shift) + round;
2920 }
2921 static inline int16_t
vssra16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2922 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2923 {
2924 uint8_t round, shift = b & 0xf;
2925
2926 round = get_round(vxrm, a, shift);
2927 return (a >> shift) + round;
2928 }
2929 static inline int32_t
vssra32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2930 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2931 {
2932 uint8_t round, shift = b & 0x1f;
2933
2934 round = get_round(vxrm, a, shift);
2935 return (a >> shift) + round;
2936 }
2937 static inline int64_t
vssra64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2938 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2939 {
2940 uint8_t round, shift = b & 0x3f;
2941
2942 round = get_round(vxrm, a, shift);
2943 return (a >> shift) + round;
2944 }
2945
RVVCALL(OPIVV2_RM,vssra_vv_b,OP_SSS_B,H1,H1,H1,vssra8)2946 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2947 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2948 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2949 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2950 GEN_VEXT_VV_RM(vssra_vv_b, 1)
2951 GEN_VEXT_VV_RM(vssra_vv_h, 2)
2952 GEN_VEXT_VV_RM(vssra_vv_w, 4)
2953 GEN_VEXT_VV_RM(vssra_vv_d, 8)
2954
2955 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2956 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2957 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2958 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2959 GEN_VEXT_VX_RM(vssra_vx_b, 1)
2960 GEN_VEXT_VX_RM(vssra_vx_h, 2)
2961 GEN_VEXT_VX_RM(vssra_vx_w, 4)
2962 GEN_VEXT_VX_RM(vssra_vx_d, 8)
2963
2964 /* Vector Narrowing Fixed-Point Clip Instructions */
2965 static inline int8_t
2966 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2967 {
2968 uint8_t round, shift = b & 0xf;
2969 int16_t res;
2970
2971 round = get_round(vxrm, a, shift);
2972 res = (a >> shift) + round;
2973 if (res > INT8_MAX) {
2974 env->vxsat = 0x1;
2975 return INT8_MAX;
2976 } else if (res < INT8_MIN) {
2977 env->vxsat = 0x1;
2978 return INT8_MIN;
2979 } else {
2980 return res;
2981 }
2982 }
2983
2984 static inline int16_t
vnclip16(CPURISCVState * env,int vxrm,int32_t a,int16_t b)2985 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2986 {
2987 uint8_t round, shift = b & 0x1f;
2988 int32_t res;
2989
2990 round = get_round(vxrm, a, shift);
2991 res = (a >> shift) + round;
2992 if (res > INT16_MAX) {
2993 env->vxsat = 0x1;
2994 return INT16_MAX;
2995 } else if (res < INT16_MIN) {
2996 env->vxsat = 0x1;
2997 return INT16_MIN;
2998 } else {
2999 return res;
3000 }
3001 }
3002
3003 static inline int32_t
vnclip32(CPURISCVState * env,int vxrm,int64_t a,int32_t b)3004 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
3005 {
3006 uint8_t round, shift = b & 0x3f;
3007 int64_t res;
3008
3009 round = get_round(vxrm, a, shift);
3010 res = (a >> shift) + round;
3011 if (res > INT32_MAX) {
3012 env->vxsat = 0x1;
3013 return INT32_MAX;
3014 } else if (res < INT32_MIN) {
3015 env->vxsat = 0x1;
3016 return INT32_MIN;
3017 } else {
3018 return res;
3019 }
3020 }
3021
RVVCALL(OPIVV2_RM,vnclip_wv_b,NOP_SSS_B,H1,H2,H1,vnclip8)3022 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
3023 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
3024 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
3025 GEN_VEXT_VV_RM(vnclip_wv_b, 1)
3026 GEN_VEXT_VV_RM(vnclip_wv_h, 2)
3027 GEN_VEXT_VV_RM(vnclip_wv_w, 4)
3028
3029 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
3030 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
3031 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
3032 GEN_VEXT_VX_RM(vnclip_wx_b, 1)
3033 GEN_VEXT_VX_RM(vnclip_wx_h, 2)
3034 GEN_VEXT_VX_RM(vnclip_wx_w, 4)
3035
3036 static inline uint8_t
3037 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
3038 {
3039 uint8_t round, shift = b & 0xf;
3040 uint16_t res;
3041
3042 round = get_round(vxrm, a, shift);
3043 res = (a >> shift) + round;
3044 if (res > UINT8_MAX) {
3045 env->vxsat = 0x1;
3046 return UINT8_MAX;
3047 } else {
3048 return res;
3049 }
3050 }
3051
3052 static inline uint16_t
vnclipu16(CPURISCVState * env,int vxrm,uint32_t a,uint16_t b)3053 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3054 {
3055 uint8_t round, shift = b & 0x1f;
3056 uint32_t res;
3057
3058 round = get_round(vxrm, a, shift);
3059 res = (a >> shift) + round;
3060 if (res > UINT16_MAX) {
3061 env->vxsat = 0x1;
3062 return UINT16_MAX;
3063 } else {
3064 return res;
3065 }
3066 }
3067
3068 static inline uint32_t
vnclipu32(CPURISCVState * env,int vxrm,uint64_t a,uint32_t b)3069 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3070 {
3071 uint8_t round, shift = b & 0x3f;
3072 uint64_t res;
3073
3074 round = get_round(vxrm, a, shift);
3075 res = (a >> shift) + round;
3076 if (res > UINT32_MAX) {
3077 env->vxsat = 0x1;
3078 return UINT32_MAX;
3079 } else {
3080 return res;
3081 }
3082 }
3083
RVVCALL(OPIVV2_RM,vnclipu_wv_b,NOP_UUU_B,H1,H2,H1,vnclipu8)3084 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3085 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3086 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3087 GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3088 GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3089 GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
3090
3091 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3092 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3093 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
3094 GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3095 GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3096 GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
3097
3098 /*
3099 * Vector Float Point Arithmetic Instructions
3100 */
3101 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
3102 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3103 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3104 CPURISCVState *env) \
3105 { \
3106 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3107 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3108 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3109 }
3110
3111 #define GEN_VEXT_VV_ENV(NAME, ESZ) \
3112 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3113 void *vs2, CPURISCVState *env, \
3114 uint32_t desc) \
3115 { \
3116 uint32_t vm = vext_vm(desc); \
3117 uint32_t vl = env->vl; \
3118 uint32_t total_elems = \
3119 vext_get_total_elems(env, desc, ESZ); \
3120 uint32_t vta = vext_vta(desc); \
3121 uint32_t vma = vext_vma(desc); \
3122 uint32_t i; \
3123 \
3124 VSTART_CHECK_EARLY_EXIT(env, vl); \
3125 \
3126 for (i = env->vstart; i < vl; i++) { \
3127 if (!vm && !vext_elem_mask(v0, i)) { \
3128 /* set masked-off elements to 1s */ \
3129 vext_set_elems_1s(vd, vma, i * ESZ, \
3130 (i + 1) * ESZ); \
3131 continue; \
3132 } \
3133 do_##NAME(vd, vs1, vs2, i, env); \
3134 } \
3135 env->vstart = 0; \
3136 /* set tail elements to 1s */ \
3137 vext_set_elems_1s(vd, vta, vl * ESZ, \
3138 total_elems * ESZ); \
3139 }
3140
3141 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3142 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3143 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3144 GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3145 GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3146 GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
3147
3148 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3149 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3150 CPURISCVState *env) \
3151 { \
3152 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3153 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3154 }
3155
3156 #define GEN_VEXT_VF(NAME, ESZ) \
3157 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3158 void *vs2, CPURISCVState *env, \
3159 uint32_t desc) \
3160 { \
3161 uint32_t vm = vext_vm(desc); \
3162 uint32_t vl = env->vl; \
3163 uint32_t total_elems = \
3164 vext_get_total_elems(env, desc, ESZ); \
3165 uint32_t vta = vext_vta(desc); \
3166 uint32_t vma = vext_vma(desc); \
3167 uint32_t i; \
3168 \
3169 VSTART_CHECK_EARLY_EXIT(env, vl); \
3170 \
3171 for (i = env->vstart; i < vl; i++) { \
3172 if (!vm && !vext_elem_mask(v0, i)) { \
3173 /* set masked-off elements to 1s */ \
3174 vext_set_elems_1s(vd, vma, i * ESZ, \
3175 (i + 1) * ESZ); \
3176 continue; \
3177 } \
3178 do_##NAME(vd, s1, vs2, i, env); \
3179 } \
3180 env->vstart = 0; \
3181 /* set tail elements to 1s */ \
3182 vext_set_elems_1s(vd, vta, vl * ESZ, \
3183 total_elems * ESZ); \
3184 }
3185
3186 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3187 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3188 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3189 GEN_VEXT_VF(vfadd_vf_h, 2)
3190 GEN_VEXT_VF(vfadd_vf_w, 4)
3191 GEN_VEXT_VF(vfadd_vf_d, 8)
3192
3193 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3194 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3195 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3196 GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3197 GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3198 GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
3199 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3200 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3201 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3202 GEN_VEXT_VF(vfsub_vf_h, 2)
3203 GEN_VEXT_VF(vfsub_vf_w, 4)
3204 GEN_VEXT_VF(vfsub_vf_d, 8)
3205
3206 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3207 {
3208 return float16_sub(b, a, s);
3209 }
3210
float32_rsub(uint32_t a,uint32_t b,float_status * s)3211 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3212 {
3213 return float32_sub(b, a, s);
3214 }
3215
float64_rsub(uint64_t a,uint64_t b,float_status * s)3216 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3217 {
3218 return float64_sub(b, a, s);
3219 }
3220
RVVCALL(OPFVF2,vfrsub_vf_h,OP_UUU_H,H2,H2,float16_rsub)3221 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3222 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3223 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3224 GEN_VEXT_VF(vfrsub_vf_h, 2)
3225 GEN_VEXT_VF(vfrsub_vf_w, 4)
3226 GEN_VEXT_VF(vfrsub_vf_d, 8)
3227
3228 /* Vector Widening Floating-Point Add/Subtract Instructions */
3229 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3230 {
3231 return float32_add(float16_to_float32(a, true, s),
3232 float16_to_float32(b, true, s), s);
3233 }
3234
vfwadd32(uint32_t a,uint32_t b,float_status * s)3235 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3236 {
3237 return float64_add(float32_to_float64(a, s),
3238 float32_to_float64(b, s), s);
3239
3240 }
3241
RVVCALL(OPFVV2,vfwadd_vv_h,WOP_UUU_H,H4,H2,H2,vfwadd16)3242 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3243 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3244 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3245 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3246 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3247 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3248 GEN_VEXT_VF(vfwadd_vf_h, 4)
3249 GEN_VEXT_VF(vfwadd_vf_w, 8)
3250
3251 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3252 {
3253 return float32_sub(float16_to_float32(a, true, s),
3254 float16_to_float32(b, true, s), s);
3255 }
3256
vfwsub32(uint32_t a,uint32_t b,float_status * s)3257 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3258 {
3259 return float64_sub(float32_to_float64(a, s),
3260 float32_to_float64(b, s), s);
3261
3262 }
3263
RVVCALL(OPFVV2,vfwsub_vv_h,WOP_UUU_H,H4,H2,H2,vfwsub16)3264 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3265 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3266 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3267 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3268 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3269 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3270 GEN_VEXT_VF(vfwsub_vf_h, 4)
3271 GEN_VEXT_VF(vfwsub_vf_w, 8)
3272
3273 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3274 {
3275 return float32_add(a, float16_to_float32(b, true, s), s);
3276 }
3277
vfwaddw32(uint64_t a,uint32_t b,float_status * s)3278 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3279 {
3280 return float64_add(a, float32_to_float64(b, s), s);
3281 }
3282
RVVCALL(OPFVV2,vfwadd_wv_h,WOP_WUUU_H,H4,H2,H2,vfwaddw16)3283 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3284 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3285 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3286 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3287 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3288 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3289 GEN_VEXT_VF(vfwadd_wf_h, 4)
3290 GEN_VEXT_VF(vfwadd_wf_w, 8)
3291
3292 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3293 {
3294 return float32_sub(a, float16_to_float32(b, true, s), s);
3295 }
3296
vfwsubw32(uint64_t a,uint32_t b,float_status * s)3297 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3298 {
3299 return float64_sub(a, float32_to_float64(b, s), s);
3300 }
3301
RVVCALL(OPFVV2,vfwsub_wv_h,WOP_WUUU_H,H4,H2,H2,vfwsubw16)3302 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3303 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3304 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3305 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3306 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3307 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3308 GEN_VEXT_VF(vfwsub_wf_h, 4)
3309 GEN_VEXT_VF(vfwsub_wf_w, 8)
3310
3311 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3312 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3313 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3314 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3315 GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3316 GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3317 GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3318 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3319 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3320 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3321 GEN_VEXT_VF(vfmul_vf_h, 2)
3322 GEN_VEXT_VF(vfmul_vf_w, 4)
3323 GEN_VEXT_VF(vfmul_vf_d, 8)
3324
3325 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3326 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3327 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3328 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3329 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3330 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3331 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3332 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3333 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3334 GEN_VEXT_VF(vfdiv_vf_h, 2)
3335 GEN_VEXT_VF(vfdiv_vf_w, 4)
3336 GEN_VEXT_VF(vfdiv_vf_d, 8)
3337
3338 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3339 {
3340 return float16_div(b, a, s);
3341 }
3342
float32_rdiv(uint32_t a,uint32_t b,float_status * s)3343 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3344 {
3345 return float32_div(b, a, s);
3346 }
3347
float64_rdiv(uint64_t a,uint64_t b,float_status * s)3348 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3349 {
3350 return float64_div(b, a, s);
3351 }
3352
RVVCALL(OPFVF2,vfrdiv_vf_h,OP_UUU_H,H2,H2,float16_rdiv)3353 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3354 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3355 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3356 GEN_VEXT_VF(vfrdiv_vf_h, 2)
3357 GEN_VEXT_VF(vfrdiv_vf_w, 4)
3358 GEN_VEXT_VF(vfrdiv_vf_d, 8)
3359
3360 /* Vector Widening Floating-Point Multiply */
3361 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3362 {
3363 return float32_mul(float16_to_float32(a, true, s),
3364 float16_to_float32(b, true, s), s);
3365 }
3366
vfwmul32(uint32_t a,uint32_t b,float_status * s)3367 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3368 {
3369 return float64_mul(float32_to_float64(a, s),
3370 float32_to_float64(b, s), s);
3371
3372 }
RVVCALL(OPFVV2,vfwmul_vv_h,WOP_UUU_H,H4,H2,H2,vfwmul16)3373 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3374 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3375 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3376 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3377 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3378 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3379 GEN_VEXT_VF(vfwmul_vf_h, 4)
3380 GEN_VEXT_VF(vfwmul_vf_w, 8)
3381
3382 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3383 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3384 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3385 CPURISCVState *env) \
3386 { \
3387 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3388 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3389 TD d = *((TD *)vd + HD(i)); \
3390 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3391 }
3392
3393 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3394 {
3395 return float16_muladd(a, b, d, 0, s);
3396 }
3397
fmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3398 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3399 {
3400 return float32_muladd(a, b, d, 0, s);
3401 }
3402
fmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3403 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3404 {
3405 return float64_muladd(a, b, d, 0, s);
3406 }
3407
RVVCALL(OPFVV3,vfmacc_vv_h,OP_UUU_H,H2,H2,H2,fmacc16)3408 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3409 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3410 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3411 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3412 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3413 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3414
3415 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3416 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3417 CPURISCVState *env) \
3418 { \
3419 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3420 TD d = *((TD *)vd + HD(i)); \
3421 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3422 }
3423
3424 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3425 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3426 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3427 GEN_VEXT_VF(vfmacc_vf_h, 2)
3428 GEN_VEXT_VF(vfmacc_vf_w, 4)
3429 GEN_VEXT_VF(vfmacc_vf_d, 8)
3430
3431 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3432 {
3433 return float16_muladd(a, b, d, float_muladd_negate_c |
3434 float_muladd_negate_product, s);
3435 }
3436
fnmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3437 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3438 {
3439 return float32_muladd(a, b, d, float_muladd_negate_c |
3440 float_muladd_negate_product, s);
3441 }
3442
fnmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3443 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3444 {
3445 return float64_muladd(a, b, d, float_muladd_negate_c |
3446 float_muladd_negate_product, s);
3447 }
3448
RVVCALL(OPFVV3,vfnmacc_vv_h,OP_UUU_H,H2,H2,H2,fnmacc16)3449 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3450 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3451 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3452 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3453 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3454 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3455 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3456 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3457 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3458 GEN_VEXT_VF(vfnmacc_vf_h, 2)
3459 GEN_VEXT_VF(vfnmacc_vf_w, 4)
3460 GEN_VEXT_VF(vfnmacc_vf_d, 8)
3461
3462 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3463 {
3464 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3465 }
3466
fmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3467 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3468 {
3469 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3470 }
3471
fmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3472 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3473 {
3474 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3475 }
3476
RVVCALL(OPFVV3,vfmsac_vv_h,OP_UUU_H,H2,H2,H2,fmsac16)3477 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3478 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3479 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3480 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3481 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3482 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3483 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3484 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3485 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3486 GEN_VEXT_VF(vfmsac_vf_h, 2)
3487 GEN_VEXT_VF(vfmsac_vf_w, 4)
3488 GEN_VEXT_VF(vfmsac_vf_d, 8)
3489
3490 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3491 {
3492 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3493 }
3494
fnmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3495 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3496 {
3497 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3498 }
3499
fnmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3500 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3501 {
3502 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3503 }
3504
RVVCALL(OPFVV3,vfnmsac_vv_h,OP_UUU_H,H2,H2,H2,fnmsac16)3505 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3506 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3507 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3508 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3509 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3510 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3511 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3512 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3513 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3514 GEN_VEXT_VF(vfnmsac_vf_h, 2)
3515 GEN_VEXT_VF(vfnmsac_vf_w, 4)
3516 GEN_VEXT_VF(vfnmsac_vf_d, 8)
3517
3518 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3519 {
3520 return float16_muladd(d, b, a, 0, s);
3521 }
3522
fmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3523 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3524 {
3525 return float32_muladd(d, b, a, 0, s);
3526 }
3527
fmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3528 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3529 {
3530 return float64_muladd(d, b, a, 0, s);
3531 }
3532
RVVCALL(OPFVV3,vfmadd_vv_h,OP_UUU_H,H2,H2,H2,fmadd16)3533 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3534 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3535 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3536 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3537 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3538 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3539 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3540 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3541 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3542 GEN_VEXT_VF(vfmadd_vf_h, 2)
3543 GEN_VEXT_VF(vfmadd_vf_w, 4)
3544 GEN_VEXT_VF(vfmadd_vf_d, 8)
3545
3546 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3547 {
3548 return float16_muladd(d, b, a, float_muladd_negate_c |
3549 float_muladd_negate_product, s);
3550 }
3551
fnmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3552 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3553 {
3554 return float32_muladd(d, b, a, float_muladd_negate_c |
3555 float_muladd_negate_product, s);
3556 }
3557
fnmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3558 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3559 {
3560 return float64_muladd(d, b, a, float_muladd_negate_c |
3561 float_muladd_negate_product, s);
3562 }
3563
RVVCALL(OPFVV3,vfnmadd_vv_h,OP_UUU_H,H2,H2,H2,fnmadd16)3564 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3565 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3566 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3567 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3568 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3569 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3570 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3571 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3572 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3573 GEN_VEXT_VF(vfnmadd_vf_h, 2)
3574 GEN_VEXT_VF(vfnmadd_vf_w, 4)
3575 GEN_VEXT_VF(vfnmadd_vf_d, 8)
3576
3577 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3578 {
3579 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3580 }
3581
fmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3582 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3583 {
3584 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3585 }
3586
fmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3587 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3588 {
3589 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3590 }
3591
RVVCALL(OPFVV3,vfmsub_vv_h,OP_UUU_H,H2,H2,H2,fmsub16)3592 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3593 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3594 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3595 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3596 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3597 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3598 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3599 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3600 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3601 GEN_VEXT_VF(vfmsub_vf_h, 2)
3602 GEN_VEXT_VF(vfmsub_vf_w, 4)
3603 GEN_VEXT_VF(vfmsub_vf_d, 8)
3604
3605 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3606 {
3607 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3608 }
3609
fnmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3610 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3611 {
3612 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3613 }
3614
fnmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3615 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3616 {
3617 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3618 }
3619
RVVCALL(OPFVV3,vfnmsub_vv_h,OP_UUU_H,H2,H2,H2,fnmsub16)3620 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3621 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3622 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3623 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3624 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3625 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3626 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3627 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3628 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3629 GEN_VEXT_VF(vfnmsub_vf_h, 2)
3630 GEN_VEXT_VF(vfnmsub_vf_w, 4)
3631 GEN_VEXT_VF(vfnmsub_vf_d, 8)
3632
3633 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3634 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3635 {
3636 return float32_muladd(float16_to_float32(a, true, s),
3637 float16_to_float32(b, true, s), d, 0, s);
3638 }
3639
fwmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3640 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3641 {
3642 return float64_muladd(float32_to_float64(a, s),
3643 float32_to_float64(b, s), d, 0, s);
3644 }
3645
RVVCALL(OPFVV3,vfwmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwmacc16)3646 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3647 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3648 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3649 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3650 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3651 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3652 GEN_VEXT_VF(vfwmacc_vf_h, 4)
3653 GEN_VEXT_VF(vfwmacc_vf_w, 8)
3654
3655 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3656 {
3657 return float32_muladd(bfloat16_to_float32(a, s),
3658 bfloat16_to_float32(b, s), d, 0, s);
3659 }
3660
RVVCALL(OPFVV3,vfwmaccbf16_vv,WOP_UUU_H,H4,H2,H2,fwmaccbf16)3661 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3662 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3663 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3664 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3665
3666 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3667 {
3668 return float32_muladd(float16_to_float32(a, true, s),
3669 float16_to_float32(b, true, s), d,
3670 float_muladd_negate_c | float_muladd_negate_product,
3671 s);
3672 }
3673
fwnmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3674 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3675 {
3676 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3677 d, float_muladd_negate_c |
3678 float_muladd_negate_product, s);
3679 }
3680
RVVCALL(OPFVV3,vfwnmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwnmacc16)3681 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3682 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3683 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3684 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3685 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3686 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3687 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3688 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3689
3690 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3691 {
3692 return float32_muladd(float16_to_float32(a, true, s),
3693 float16_to_float32(b, true, s), d,
3694 float_muladd_negate_c, s);
3695 }
3696
fwmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3697 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3698 {
3699 return float64_muladd(float32_to_float64(a, s),
3700 float32_to_float64(b, s), d,
3701 float_muladd_negate_c, s);
3702 }
3703
RVVCALL(OPFVV3,vfwmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwmsac16)3704 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3705 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3706 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3707 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3708 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3709 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3710 GEN_VEXT_VF(vfwmsac_vf_h, 4)
3711 GEN_VEXT_VF(vfwmsac_vf_w, 8)
3712
3713 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3714 {
3715 return float32_muladd(float16_to_float32(a, true, s),
3716 float16_to_float32(b, true, s), d,
3717 float_muladd_negate_product, s);
3718 }
3719
fwnmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3720 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3721 {
3722 return float64_muladd(float32_to_float64(a, s),
3723 float32_to_float64(b, s), d,
3724 float_muladd_negate_product, s);
3725 }
3726
RVVCALL(OPFVV3,vfwnmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwnmsac16)3727 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3728 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3729 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3730 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3731 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3732 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3733 GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3734 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3735
3736 /* Vector Floating-Point Square-Root Instruction */
3737 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3738 static void do_##NAME(void *vd, void *vs2, int i, \
3739 CPURISCVState *env) \
3740 { \
3741 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3742 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3743 }
3744
3745 #define GEN_VEXT_V_ENV(NAME, ESZ) \
3746 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3747 CPURISCVState *env, uint32_t desc) \
3748 { \
3749 uint32_t vm = vext_vm(desc); \
3750 uint32_t vl = env->vl; \
3751 uint32_t total_elems = \
3752 vext_get_total_elems(env, desc, ESZ); \
3753 uint32_t vta = vext_vta(desc); \
3754 uint32_t vma = vext_vma(desc); \
3755 uint32_t i; \
3756 \
3757 VSTART_CHECK_EARLY_EXIT(env, vl); \
3758 \
3759 if (vl == 0) { \
3760 return; \
3761 } \
3762 for (i = env->vstart; i < vl; i++) { \
3763 if (!vm && !vext_elem_mask(v0, i)) { \
3764 /* set masked-off elements to 1s */ \
3765 vext_set_elems_1s(vd, vma, i * ESZ, \
3766 (i + 1) * ESZ); \
3767 continue; \
3768 } \
3769 do_##NAME(vd, vs2, i, env); \
3770 } \
3771 env->vstart = 0; \
3772 vext_set_elems_1s(vd, vta, vl * ESZ, \
3773 total_elems * ESZ); \
3774 }
3775
3776 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3777 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3778 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3779 GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3780 GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3781 GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3782
3783 /*
3784 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3785 *
3786 * Adapted from riscv-v-spec recip.c:
3787 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3788 */
3789 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3790 {
3791 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3792 uint64_t exp = extract64(f, frac_size, exp_size);
3793 uint64_t frac = extract64(f, 0, frac_size);
3794
3795 const uint8_t lookup_table[] = {
3796 52, 51, 50, 48, 47, 46, 44, 43,
3797 42, 41, 40, 39, 38, 36, 35, 34,
3798 33, 32, 31, 30, 30, 29, 28, 27,
3799 26, 25, 24, 23, 23, 22, 21, 20,
3800 19, 19, 18, 17, 16, 16, 15, 14,
3801 14, 13, 12, 12, 11, 10, 10, 9,
3802 9, 8, 7, 7, 6, 6, 5, 4,
3803 4, 3, 3, 2, 2, 1, 1, 0,
3804 127, 125, 123, 121, 119, 118, 116, 114,
3805 113, 111, 109, 108, 106, 105, 103, 102,
3806 100, 99, 97, 96, 95, 93, 92, 91,
3807 90, 88, 87, 86, 85, 84, 83, 82,
3808 80, 79, 78, 77, 76, 75, 74, 73,
3809 72, 71, 70, 70, 69, 68, 67, 66,
3810 65, 64, 63, 63, 62, 61, 60, 59,
3811 59, 58, 57, 56, 56, 55, 54, 53
3812 };
3813 const int precision = 7;
3814
3815 if (exp == 0 && frac != 0) { /* subnormal */
3816 /* Normalize the subnormal. */
3817 while (extract64(frac, frac_size - 1, 1) == 0) {
3818 exp--;
3819 frac <<= 1;
3820 }
3821
3822 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3823 }
3824
3825 int idx = ((exp & 1) << (precision - 1)) |
3826 (frac >> (frac_size - precision + 1));
3827 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3828 (frac_size - precision);
3829 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3830
3831 uint64_t val = 0;
3832 val = deposit64(val, 0, frac_size, out_frac);
3833 val = deposit64(val, frac_size, exp_size, out_exp);
3834 val = deposit64(val, frac_size + exp_size, 1, sign);
3835 return val;
3836 }
3837
frsqrt7_h(float16 f,float_status * s)3838 static float16 frsqrt7_h(float16 f, float_status *s)
3839 {
3840 int exp_size = 5, frac_size = 10;
3841 bool sign = float16_is_neg(f);
3842
3843 /*
3844 * frsqrt7(sNaN) = canonical NaN
3845 * frsqrt7(-inf) = canonical NaN
3846 * frsqrt7(-normal) = canonical NaN
3847 * frsqrt7(-subnormal) = canonical NaN
3848 */
3849 if (float16_is_signaling_nan(f, s) ||
3850 (float16_is_infinity(f) && sign) ||
3851 (float16_is_normal(f) && sign) ||
3852 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3853 s->float_exception_flags |= float_flag_invalid;
3854 return float16_default_nan(s);
3855 }
3856
3857 /* frsqrt7(qNaN) = canonical NaN */
3858 if (float16_is_quiet_nan(f, s)) {
3859 return float16_default_nan(s);
3860 }
3861
3862 /* frsqrt7(+-0) = +-inf */
3863 if (float16_is_zero(f)) {
3864 s->float_exception_flags |= float_flag_divbyzero;
3865 return float16_set_sign(float16_infinity, sign);
3866 }
3867
3868 /* frsqrt7(+inf) = +0 */
3869 if (float16_is_infinity(f) && !sign) {
3870 return float16_set_sign(float16_zero, sign);
3871 }
3872
3873 /* +normal, +subnormal */
3874 uint64_t val = frsqrt7(f, exp_size, frac_size);
3875 return make_float16(val);
3876 }
3877
frsqrt7_s(float32 f,float_status * s)3878 static float32 frsqrt7_s(float32 f, float_status *s)
3879 {
3880 int exp_size = 8, frac_size = 23;
3881 bool sign = float32_is_neg(f);
3882
3883 /*
3884 * frsqrt7(sNaN) = canonical NaN
3885 * frsqrt7(-inf) = canonical NaN
3886 * frsqrt7(-normal) = canonical NaN
3887 * frsqrt7(-subnormal) = canonical NaN
3888 */
3889 if (float32_is_signaling_nan(f, s) ||
3890 (float32_is_infinity(f) && sign) ||
3891 (float32_is_normal(f) && sign) ||
3892 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3893 s->float_exception_flags |= float_flag_invalid;
3894 return float32_default_nan(s);
3895 }
3896
3897 /* frsqrt7(qNaN) = canonical NaN */
3898 if (float32_is_quiet_nan(f, s)) {
3899 return float32_default_nan(s);
3900 }
3901
3902 /* frsqrt7(+-0) = +-inf */
3903 if (float32_is_zero(f)) {
3904 s->float_exception_flags |= float_flag_divbyzero;
3905 return float32_set_sign(float32_infinity, sign);
3906 }
3907
3908 /* frsqrt7(+inf) = +0 */
3909 if (float32_is_infinity(f) && !sign) {
3910 return float32_set_sign(float32_zero, sign);
3911 }
3912
3913 /* +normal, +subnormal */
3914 uint64_t val = frsqrt7(f, exp_size, frac_size);
3915 return make_float32(val);
3916 }
3917
frsqrt7_d(float64 f,float_status * s)3918 static float64 frsqrt7_d(float64 f, float_status *s)
3919 {
3920 int exp_size = 11, frac_size = 52;
3921 bool sign = float64_is_neg(f);
3922
3923 /*
3924 * frsqrt7(sNaN) = canonical NaN
3925 * frsqrt7(-inf) = canonical NaN
3926 * frsqrt7(-normal) = canonical NaN
3927 * frsqrt7(-subnormal) = canonical NaN
3928 */
3929 if (float64_is_signaling_nan(f, s) ||
3930 (float64_is_infinity(f) && sign) ||
3931 (float64_is_normal(f) && sign) ||
3932 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3933 s->float_exception_flags |= float_flag_invalid;
3934 return float64_default_nan(s);
3935 }
3936
3937 /* frsqrt7(qNaN) = canonical NaN */
3938 if (float64_is_quiet_nan(f, s)) {
3939 return float64_default_nan(s);
3940 }
3941
3942 /* frsqrt7(+-0) = +-inf */
3943 if (float64_is_zero(f)) {
3944 s->float_exception_flags |= float_flag_divbyzero;
3945 return float64_set_sign(float64_infinity, sign);
3946 }
3947
3948 /* frsqrt7(+inf) = +0 */
3949 if (float64_is_infinity(f) && !sign) {
3950 return float64_set_sign(float64_zero, sign);
3951 }
3952
3953 /* +normal, +subnormal */
3954 uint64_t val = frsqrt7(f, exp_size, frac_size);
3955 return make_float64(val);
3956 }
3957
RVVCALL(OPFVV1,vfrsqrt7_v_h,OP_UU_H,H2,H2,frsqrt7_h)3958 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3959 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3960 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3961 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3962 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3963 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3964
3965 /*
3966 * Vector Floating-Point Reciprocal Estimate Instruction
3967 *
3968 * Adapted from riscv-v-spec recip.c:
3969 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3970 */
3971 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3972 float_status *s)
3973 {
3974 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3975 uint64_t exp = extract64(f, frac_size, exp_size);
3976 uint64_t frac = extract64(f, 0, frac_size);
3977
3978 const uint8_t lookup_table[] = {
3979 127, 125, 123, 121, 119, 117, 116, 114,
3980 112, 110, 109, 107, 105, 104, 102, 100,
3981 99, 97, 96, 94, 93, 91, 90, 88,
3982 87, 85, 84, 83, 81, 80, 79, 77,
3983 76, 75, 74, 72, 71, 70, 69, 68,
3984 66, 65, 64, 63, 62, 61, 60, 59,
3985 58, 57, 56, 55, 54, 53, 52, 51,
3986 50, 49, 48, 47, 46, 45, 44, 43,
3987 42, 41, 40, 40, 39, 38, 37, 36,
3988 35, 35, 34, 33, 32, 31, 31, 30,
3989 29, 28, 28, 27, 26, 25, 25, 24,
3990 23, 23, 22, 21, 21, 20, 19, 19,
3991 18, 17, 17, 16, 15, 15, 14, 14,
3992 13, 12, 12, 11, 11, 10, 9, 9,
3993 8, 8, 7, 7, 6, 5, 5, 4,
3994 4, 3, 3, 2, 2, 1, 1, 0
3995 };
3996 const int precision = 7;
3997
3998 if (exp == 0 && frac != 0) { /* subnormal */
3999 /* Normalize the subnormal. */
4000 while (extract64(frac, frac_size - 1, 1) == 0) {
4001 exp--;
4002 frac <<= 1;
4003 }
4004
4005 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
4006
4007 if (exp != 0 && exp != UINT64_MAX) {
4008 /*
4009 * Overflow to inf or max value of same sign,
4010 * depending on sign and rounding mode.
4011 */
4012 s->float_exception_flags |= (float_flag_inexact |
4013 float_flag_overflow);
4014
4015 if ((s->float_rounding_mode == float_round_to_zero) ||
4016 ((s->float_rounding_mode == float_round_down) && !sign) ||
4017 ((s->float_rounding_mode == float_round_up) && sign)) {
4018 /* Return greatest/negative finite value. */
4019 return (sign << (exp_size + frac_size)) |
4020 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
4021 } else {
4022 /* Return +-inf. */
4023 return (sign << (exp_size + frac_size)) |
4024 MAKE_64BIT_MASK(frac_size, exp_size);
4025 }
4026 }
4027 }
4028
4029 int idx = frac >> (frac_size - precision);
4030 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
4031 (frac_size - precision);
4032 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
4033
4034 if (out_exp == 0 || out_exp == UINT64_MAX) {
4035 /*
4036 * The result is subnormal, but don't raise the underflow exception,
4037 * because there's no additional loss of precision.
4038 */
4039 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
4040 if (out_exp == UINT64_MAX) {
4041 out_frac >>= 1;
4042 out_exp = 0;
4043 }
4044 }
4045
4046 uint64_t val = 0;
4047 val = deposit64(val, 0, frac_size, out_frac);
4048 val = deposit64(val, frac_size, exp_size, out_exp);
4049 val = deposit64(val, frac_size + exp_size, 1, sign);
4050 return val;
4051 }
4052
frec7_h(float16 f,float_status * s)4053 static float16 frec7_h(float16 f, float_status *s)
4054 {
4055 int exp_size = 5, frac_size = 10;
4056 bool sign = float16_is_neg(f);
4057
4058 /* frec7(+-inf) = +-0 */
4059 if (float16_is_infinity(f)) {
4060 return float16_set_sign(float16_zero, sign);
4061 }
4062
4063 /* frec7(+-0) = +-inf */
4064 if (float16_is_zero(f)) {
4065 s->float_exception_flags |= float_flag_divbyzero;
4066 return float16_set_sign(float16_infinity, sign);
4067 }
4068
4069 /* frec7(sNaN) = canonical NaN */
4070 if (float16_is_signaling_nan(f, s)) {
4071 s->float_exception_flags |= float_flag_invalid;
4072 return float16_default_nan(s);
4073 }
4074
4075 /* frec7(qNaN) = canonical NaN */
4076 if (float16_is_quiet_nan(f, s)) {
4077 return float16_default_nan(s);
4078 }
4079
4080 /* +-normal, +-subnormal */
4081 uint64_t val = frec7(f, exp_size, frac_size, s);
4082 return make_float16(val);
4083 }
4084
frec7_s(float32 f,float_status * s)4085 static float32 frec7_s(float32 f, float_status *s)
4086 {
4087 int exp_size = 8, frac_size = 23;
4088 bool sign = float32_is_neg(f);
4089
4090 /* frec7(+-inf) = +-0 */
4091 if (float32_is_infinity(f)) {
4092 return float32_set_sign(float32_zero, sign);
4093 }
4094
4095 /* frec7(+-0) = +-inf */
4096 if (float32_is_zero(f)) {
4097 s->float_exception_flags |= float_flag_divbyzero;
4098 return float32_set_sign(float32_infinity, sign);
4099 }
4100
4101 /* frec7(sNaN) = canonical NaN */
4102 if (float32_is_signaling_nan(f, s)) {
4103 s->float_exception_flags |= float_flag_invalid;
4104 return float32_default_nan(s);
4105 }
4106
4107 /* frec7(qNaN) = canonical NaN */
4108 if (float32_is_quiet_nan(f, s)) {
4109 return float32_default_nan(s);
4110 }
4111
4112 /* +-normal, +-subnormal */
4113 uint64_t val = frec7(f, exp_size, frac_size, s);
4114 return make_float32(val);
4115 }
4116
frec7_d(float64 f,float_status * s)4117 static float64 frec7_d(float64 f, float_status *s)
4118 {
4119 int exp_size = 11, frac_size = 52;
4120 bool sign = float64_is_neg(f);
4121
4122 /* frec7(+-inf) = +-0 */
4123 if (float64_is_infinity(f)) {
4124 return float64_set_sign(float64_zero, sign);
4125 }
4126
4127 /* frec7(+-0) = +-inf */
4128 if (float64_is_zero(f)) {
4129 s->float_exception_flags |= float_flag_divbyzero;
4130 return float64_set_sign(float64_infinity, sign);
4131 }
4132
4133 /* frec7(sNaN) = canonical NaN */
4134 if (float64_is_signaling_nan(f, s)) {
4135 s->float_exception_flags |= float_flag_invalid;
4136 return float64_default_nan(s);
4137 }
4138
4139 /* frec7(qNaN) = canonical NaN */
4140 if (float64_is_quiet_nan(f, s)) {
4141 return float64_default_nan(s);
4142 }
4143
4144 /* +-normal, +-subnormal */
4145 uint64_t val = frec7(f, exp_size, frac_size, s);
4146 return make_float64(val);
4147 }
4148
RVVCALL(OPFVV1,vfrec7_v_h,OP_UU_H,H2,H2,frec7_h)4149 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4150 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4151 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
4152 GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4153 GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4154 GEN_VEXT_V_ENV(vfrec7_v_d, 8)
4155
4156 /* Vector Floating-Point MIN/MAX Instructions */
4157 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4158 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4159 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
4160 GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4161 GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4162 GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
4163 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4164 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4165 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
4166 GEN_VEXT_VF(vfmin_vf_h, 2)
4167 GEN_VEXT_VF(vfmin_vf_w, 4)
4168 GEN_VEXT_VF(vfmin_vf_d, 8)
4169
4170 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4171 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4172 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
4173 GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4174 GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4175 GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
4176 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4177 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4178 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
4179 GEN_VEXT_VF(vfmax_vf_h, 2)
4180 GEN_VEXT_VF(vfmax_vf_w, 4)
4181 GEN_VEXT_VF(vfmax_vf_d, 8)
4182
4183 /* Vector Floating-Point Sign-Injection Instructions */
4184 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4185 {
4186 return deposit64(b, 0, 15, a);
4187 }
4188
fsgnj32(uint32_t a,uint32_t b,float_status * s)4189 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4190 {
4191 return deposit64(b, 0, 31, a);
4192 }
4193
fsgnj64(uint64_t a,uint64_t b,float_status * s)4194 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4195 {
4196 return deposit64(b, 0, 63, a);
4197 }
4198
RVVCALL(OPFVV2,vfsgnj_vv_h,OP_UUU_H,H2,H2,H2,fsgnj16)4199 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4200 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4201 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
4202 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4203 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4204 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
4205 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4206 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4207 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
4208 GEN_VEXT_VF(vfsgnj_vf_h, 2)
4209 GEN_VEXT_VF(vfsgnj_vf_w, 4)
4210 GEN_VEXT_VF(vfsgnj_vf_d, 8)
4211
4212 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4213 {
4214 return deposit64(~b, 0, 15, a);
4215 }
4216
fsgnjn32(uint32_t a,uint32_t b,float_status * s)4217 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4218 {
4219 return deposit64(~b, 0, 31, a);
4220 }
4221
fsgnjn64(uint64_t a,uint64_t b,float_status * s)4222 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4223 {
4224 return deposit64(~b, 0, 63, a);
4225 }
4226
RVVCALL(OPFVV2,vfsgnjn_vv_h,OP_UUU_H,H2,H2,H2,fsgnjn16)4227 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4228 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4229 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
4230 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4231 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4232 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
4233 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4234 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4235 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
4236 GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4237 GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4238 GEN_VEXT_VF(vfsgnjn_vf_d, 8)
4239
4240 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4241 {
4242 return deposit64(b ^ a, 0, 15, a);
4243 }
4244
fsgnjx32(uint32_t a,uint32_t b,float_status * s)4245 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4246 {
4247 return deposit64(b ^ a, 0, 31, a);
4248 }
4249
fsgnjx64(uint64_t a,uint64_t b,float_status * s)4250 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4251 {
4252 return deposit64(b ^ a, 0, 63, a);
4253 }
4254
RVVCALL(OPFVV2,vfsgnjx_vv_h,OP_UUU_H,H2,H2,H2,fsgnjx16)4255 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4256 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4257 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4258 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4259 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4260 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4261 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4262 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4263 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4264 GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4265 GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4266 GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4267
4268 /* Vector Floating-Point Compare Instructions */
4269 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4270 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4271 CPURISCVState *env, uint32_t desc) \
4272 { \
4273 uint32_t vm = vext_vm(desc); \
4274 uint32_t vl = env->vl; \
4275 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4276 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4277 uint32_t vma = vext_vma(desc); \
4278 uint32_t i; \
4279 \
4280 VSTART_CHECK_EARLY_EXIT(env, vl); \
4281 \
4282 for (i = env->vstart; i < vl; i++) { \
4283 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4284 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4285 if (!vm && !vext_elem_mask(v0, i)) { \
4286 /* set masked-off elements to 1s */ \
4287 if (vma) { \
4288 vext_set_elem_mask(vd, i, 1); \
4289 } \
4290 continue; \
4291 } \
4292 vext_set_elem_mask(vd, i, \
4293 DO_OP(s2, s1, &env->fp_status)); \
4294 } \
4295 env->vstart = 0; \
4296 /*
4297 * mask destination register are always tail-agnostic
4298 * set tail elements to 1s
4299 */ \
4300 if (vta_all_1s) { \
4301 for (; i < total_elems; i++) { \
4302 vext_set_elem_mask(vd, i, 1); \
4303 } \
4304 } \
4305 }
4306
4307 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4308 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4309 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4310
4311 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4312 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4313 CPURISCVState *env, uint32_t desc) \
4314 { \
4315 uint32_t vm = vext_vm(desc); \
4316 uint32_t vl = env->vl; \
4317 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4318 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4319 uint32_t vma = vext_vma(desc); \
4320 uint32_t i; \
4321 \
4322 VSTART_CHECK_EARLY_EXIT(env, vl); \
4323 \
4324 for (i = env->vstart; i < vl; i++) { \
4325 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4326 if (!vm && !vext_elem_mask(v0, i)) { \
4327 /* set masked-off elements to 1s */ \
4328 if (vma) { \
4329 vext_set_elem_mask(vd, i, 1); \
4330 } \
4331 continue; \
4332 } \
4333 vext_set_elem_mask(vd, i, \
4334 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4335 } \
4336 env->vstart = 0; \
4337 /*
4338 * mask destination register are always tail-agnostic
4339 * set tail elements to 1s
4340 */ \
4341 if (vta_all_1s) { \
4342 for (; i < total_elems; i++) { \
4343 vext_set_elem_mask(vd, i, 1); \
4344 } \
4345 } \
4346 }
4347
4348 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4349 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4350 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4351
4352 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4353 {
4354 FloatRelation compare = float16_compare_quiet(a, b, s);
4355 return compare != float_relation_equal;
4356 }
4357
vmfne32(uint32_t a,uint32_t b,float_status * s)4358 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4359 {
4360 FloatRelation compare = float32_compare_quiet(a, b, s);
4361 return compare != float_relation_equal;
4362 }
4363
vmfne64(uint64_t a,uint64_t b,float_status * s)4364 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4365 {
4366 FloatRelation compare = float64_compare_quiet(a, b, s);
4367 return compare != float_relation_equal;
4368 }
4369
GEN_VEXT_CMP_VV_ENV(vmfne_vv_h,uint16_t,H2,vmfne16)4370 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4371 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4372 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4373 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4374 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4375 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4376
4377 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4378 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4379 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4380 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4381 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4382 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4383
4384 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4385 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4386 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4387 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4388 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4389 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4390
4391 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4392 {
4393 FloatRelation compare = float16_compare(a, b, s);
4394 return compare == float_relation_greater;
4395 }
4396
vmfgt32(uint32_t a,uint32_t b,float_status * s)4397 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4398 {
4399 FloatRelation compare = float32_compare(a, b, s);
4400 return compare == float_relation_greater;
4401 }
4402
vmfgt64(uint64_t a,uint64_t b,float_status * s)4403 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4404 {
4405 FloatRelation compare = float64_compare(a, b, s);
4406 return compare == float_relation_greater;
4407 }
4408
GEN_VEXT_CMP_VF(vmfgt_vf_h,uint16_t,H2,vmfgt16)4409 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4410 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4411 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4412
4413 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4414 {
4415 FloatRelation compare = float16_compare(a, b, s);
4416 return compare == float_relation_greater ||
4417 compare == float_relation_equal;
4418 }
4419
vmfge32(uint32_t a,uint32_t b,float_status * s)4420 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4421 {
4422 FloatRelation compare = float32_compare(a, b, s);
4423 return compare == float_relation_greater ||
4424 compare == float_relation_equal;
4425 }
4426
vmfge64(uint64_t a,uint64_t b,float_status * s)4427 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4428 {
4429 FloatRelation compare = float64_compare(a, b, s);
4430 return compare == float_relation_greater ||
4431 compare == float_relation_equal;
4432 }
4433
GEN_VEXT_CMP_VF(vmfge_vf_h,uint16_t,H2,vmfge16)4434 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4435 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4436 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4437
4438 /* Vector Floating-Point Classify Instruction */
4439 target_ulong fclass_h(uint64_t frs1)
4440 {
4441 float16 f = frs1;
4442 bool sign = float16_is_neg(f);
4443
4444 if (float16_is_infinity(f)) {
4445 return sign ? 1 << 0 : 1 << 7;
4446 } else if (float16_is_zero(f)) {
4447 return sign ? 1 << 3 : 1 << 4;
4448 } else if (float16_is_zero_or_denormal(f)) {
4449 return sign ? 1 << 2 : 1 << 5;
4450 } else if (float16_is_any_nan(f)) {
4451 float_status s = { }; /* for snan_bit_is_one */
4452 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4453 } else {
4454 return sign ? 1 << 1 : 1 << 6;
4455 }
4456 }
4457
fclass_s(uint64_t frs1)4458 target_ulong fclass_s(uint64_t frs1)
4459 {
4460 float32 f = frs1;
4461 bool sign = float32_is_neg(f);
4462
4463 if (float32_is_infinity(f)) {
4464 return sign ? 1 << 0 : 1 << 7;
4465 } else if (float32_is_zero(f)) {
4466 return sign ? 1 << 3 : 1 << 4;
4467 } else if (float32_is_zero_or_denormal(f)) {
4468 return sign ? 1 << 2 : 1 << 5;
4469 } else if (float32_is_any_nan(f)) {
4470 float_status s = { }; /* for snan_bit_is_one */
4471 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4472 } else {
4473 return sign ? 1 << 1 : 1 << 6;
4474 }
4475 }
4476
fclass_d(uint64_t frs1)4477 target_ulong fclass_d(uint64_t frs1)
4478 {
4479 float64 f = frs1;
4480 bool sign = float64_is_neg(f);
4481
4482 if (float64_is_infinity(f)) {
4483 return sign ? 1 << 0 : 1 << 7;
4484 } else if (float64_is_zero(f)) {
4485 return sign ? 1 << 3 : 1 << 4;
4486 } else if (float64_is_zero_or_denormal(f)) {
4487 return sign ? 1 << 2 : 1 << 5;
4488 } else if (float64_is_any_nan(f)) {
4489 float_status s = { }; /* for snan_bit_is_one */
4490 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4491 } else {
4492 return sign ? 1 << 1 : 1 << 6;
4493 }
4494 }
4495
RVVCALL(OPIVV1,vfclass_v_h,OP_UU_H,H2,H2,fclass_h)4496 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4497 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4498 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4499 GEN_VEXT_V(vfclass_v_h, 2)
4500 GEN_VEXT_V(vfclass_v_w, 4)
4501 GEN_VEXT_V(vfclass_v_d, 8)
4502
4503 /* Vector Floating-Point Merge Instruction */
4504
4505 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4506 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4507 CPURISCVState *env, uint32_t desc) \
4508 { \
4509 uint32_t vm = vext_vm(desc); \
4510 uint32_t vl = env->vl; \
4511 uint32_t esz = sizeof(ETYPE); \
4512 uint32_t total_elems = \
4513 vext_get_total_elems(env, desc, esz); \
4514 uint32_t vta = vext_vta(desc); \
4515 uint32_t i; \
4516 \
4517 VSTART_CHECK_EARLY_EXIT(env, vl); \
4518 \
4519 for (i = env->vstart; i < vl; i++) { \
4520 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4521 *((ETYPE *)vd + H(i)) = \
4522 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4523 } \
4524 env->vstart = 0; \
4525 /* set tail elements to 1s */ \
4526 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4527 }
4528
4529 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4530 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4531 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4532
4533 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4534 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4535 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4536 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4537 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4538 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4539 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4540 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4541
4542 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4543 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4544 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4545 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4546 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4547 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4548 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4549
4550 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4551 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4552 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4553 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4554 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4555 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4556 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4557
4558 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4559 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4560 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4561 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4562 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4563 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4564 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4565
4566 /* Widening Floating-Point/Integer Type-Convert Instructions */
4567 /* (TD, T2, TX2) */
4568 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4569 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4570 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4571 /*
4572 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4573 */
4574 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4575 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4576 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4577 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4578
4579 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4580 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4581 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4582 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4583 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4584
4585 /*
4586 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4587 */
4588 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4589 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4590 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4591 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4592 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4593 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4594
4595 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4596 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4597 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4598 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4599 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4600 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4601 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4602
4603 /*
4604 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4605 */
4606 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4607 {
4608 return float16_to_float32(a, true, s);
4609 }
4610
RVVCALL(OPFVV1,vfwcvt_f_f_v_h,WOP_UU_H,H4,H2,vfwcvtffv16)4611 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4612 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4613 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4614 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4615
4616 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4617 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4618
4619 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4620 /* (TD, T2, TX2) */
4621 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4622 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4623 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4624 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4625 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4626 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4627 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4628 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4629 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4630 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4631
4632 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4633 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4634 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4635 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4636 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4637 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4638 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4639
4640 /*
4641 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4642 */
4643 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4644 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4645 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4646 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4647
4648 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4649 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4650 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4651 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4652 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4653
4654 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4655 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4656 {
4657 return float32_to_float16(a, true, s);
4658 }
4659
RVVCALL(OPFVV1,vfncvt_f_f_w_h,NOP_UU_H,H2,H4,vfncvtffv16)4660 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4661 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4662 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4663 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4664
4665 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4666 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4667
4668 /*
4669 * Vector Reduction Operations
4670 */
4671 /* Vector Single-Width Integer Reduction Instructions */
4672 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4673 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4674 void *vs2, CPURISCVState *env, \
4675 uint32_t desc) \
4676 { \
4677 uint32_t vm = vext_vm(desc); \
4678 uint32_t vl = env->vl; \
4679 uint32_t esz = sizeof(TD); \
4680 uint32_t vlenb = simd_maxsz(desc); \
4681 uint32_t vta = vext_vta(desc); \
4682 uint32_t i; \
4683 TD s1 = *((TD *)vs1 + HD(0)); \
4684 \
4685 VSTART_CHECK_EARLY_EXIT(env, vl); \
4686 \
4687 for (i = env->vstart; i < vl; i++) { \
4688 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4689 if (!vm && !vext_elem_mask(v0, i)) { \
4690 continue; \
4691 } \
4692 s1 = OP(s1, (TD)s2); \
4693 } \
4694 if (vl > 0) { \
4695 *((TD *)vd + HD(0)) = s1; \
4696 } \
4697 env->vstart = 0; \
4698 /* set tail elements to 1s */ \
4699 vext_set_elems_1s(vd, vta, esz, vlenb); \
4700 }
4701
4702 /* vd[0] = sum(vs1[0], vs2[*]) */
4703 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4704 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4705 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4706 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4707
4708 /* vd[0] = maxu(vs1[0], vs2[*]) */
4709 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4710 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4711 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4712 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4713
4714 /* vd[0] = max(vs1[0], vs2[*]) */
4715 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4716 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4717 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4718 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4719
4720 /* vd[0] = minu(vs1[0], vs2[*]) */
4721 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4722 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4723 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4724 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4725
4726 /* vd[0] = min(vs1[0], vs2[*]) */
4727 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4728 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4729 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4730 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4731
4732 /* vd[0] = and(vs1[0], vs2[*]) */
4733 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4734 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4735 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4736 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4737
4738 /* vd[0] = or(vs1[0], vs2[*]) */
4739 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4740 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4741 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4742 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4743
4744 /* vd[0] = xor(vs1[0], vs2[*]) */
4745 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4746 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4747 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4748 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4749
4750 /* Vector Widening Integer Reduction Instructions */
4751 /* signed sum reduction into double-width accumulator */
4752 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4753 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4754 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4755
4756 /* Unsigned sum reduction into double-width accumulator */
4757 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4758 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4759 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4760
4761 /* Vector Single-Width Floating-Point Reduction Instructions */
4762 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4763 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4764 void *vs2, CPURISCVState *env, \
4765 uint32_t desc) \
4766 { \
4767 uint32_t vm = vext_vm(desc); \
4768 uint32_t vl = env->vl; \
4769 uint32_t esz = sizeof(TD); \
4770 uint32_t vlenb = simd_maxsz(desc); \
4771 uint32_t vta = vext_vta(desc); \
4772 uint32_t i; \
4773 TD s1 = *((TD *)vs1 + HD(0)); \
4774 \
4775 VSTART_CHECK_EARLY_EXIT(env, vl); \
4776 \
4777 for (i = env->vstart; i < vl; i++) { \
4778 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4779 if (!vm && !vext_elem_mask(v0, i)) { \
4780 continue; \
4781 } \
4782 s1 = OP(s1, (TD)s2, &env->fp_status); \
4783 } \
4784 if (vl > 0) { \
4785 *((TD *)vd + HD(0)) = s1; \
4786 } \
4787 env->vstart = 0; \
4788 /* set tail elements to 1s */ \
4789 vext_set_elems_1s(vd, vta, esz, vlenb); \
4790 }
4791
4792 /* Unordered sum */
4793 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4794 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4795 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4796
4797 /* Ordered sum */
4798 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4799 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4800 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4801
4802 /* Maximum value */
4803 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4804 float16_maximum_number)
4805 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4806 float32_maximum_number)
4807 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4808 float64_maximum_number)
4809
4810 /* Minimum value */
4811 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4812 float16_minimum_number)
4813 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4814 float32_minimum_number)
4815 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4816 float64_minimum_number)
4817
4818 /* Vector Widening Floating-Point Add Instructions */
4819 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4820 {
4821 return float32_add(a, float16_to_float32(b, true, s), s);
4822 }
4823
fwadd32(uint64_t a,uint32_t b,float_status * s)4824 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4825 {
4826 return float64_add(a, float32_to_float64(b, s), s);
4827 }
4828
4829 /* Vector Widening Floating-Point Reduction Instructions */
4830 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
GEN_VEXT_FRED(vfwredusum_vs_h,uint32_t,uint16_t,H4,H2,fwadd16)4831 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4832 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4833 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4834 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4835
4836 /*
4837 * Vector Mask Operations
4838 */
4839 /* Vector Mask-Register Logical Instructions */
4840 #define GEN_VEXT_MASK_VV(NAME, OP) \
4841 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4842 void *vs2, CPURISCVState *env, \
4843 uint32_t desc) \
4844 { \
4845 uint32_t vl = env->vl; \
4846 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4847 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4848 uint32_t i; \
4849 int a, b; \
4850 \
4851 VSTART_CHECK_EARLY_EXIT(env, vl); \
4852 \
4853 for (i = env->vstart; i < vl; i++) { \
4854 a = vext_elem_mask(vs1, i); \
4855 b = vext_elem_mask(vs2, i); \
4856 vext_set_elem_mask(vd, i, OP(b, a)); \
4857 } \
4858 env->vstart = 0; \
4859 /*
4860 * mask destination register are always tail-agnostic
4861 * set tail elements to 1s
4862 */ \
4863 if (vta_all_1s) { \
4864 for (; i < total_elems; i++) { \
4865 vext_set_elem_mask(vd, i, 1); \
4866 } \
4867 } \
4868 }
4869
4870 #define DO_NAND(N, M) (!(N & M))
4871 #define DO_ANDNOT(N, M) (N & !M)
4872 #define DO_NOR(N, M) (!(N | M))
4873 #define DO_ORNOT(N, M) (N | !M)
4874 #define DO_XNOR(N, M) (!(N ^ M))
4875
4876 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4877 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4878 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4879 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4880 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4881 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4882 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4883 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4884
4885 /* Vector count population in mask vcpop */
4886 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4887 uint32_t desc)
4888 {
4889 target_ulong cnt = 0;
4890 uint32_t vm = vext_vm(desc);
4891 uint32_t vl = env->vl;
4892 int i;
4893
4894 for (i = env->vstart; i < vl; i++) {
4895 if (vm || vext_elem_mask(v0, i)) {
4896 if (vext_elem_mask(vs2, i)) {
4897 cnt++;
4898 }
4899 }
4900 }
4901 env->vstart = 0;
4902 return cnt;
4903 }
4904
4905 /* vfirst find-first-set mask bit */
HELPER(vfirst_m)4906 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4907 uint32_t desc)
4908 {
4909 uint32_t vm = vext_vm(desc);
4910 uint32_t vl = env->vl;
4911 int i;
4912
4913 for (i = env->vstart; i < vl; i++) {
4914 if (vm || vext_elem_mask(v0, i)) {
4915 if (vext_elem_mask(vs2, i)) {
4916 return i;
4917 }
4918 }
4919 }
4920 env->vstart = 0;
4921 return -1LL;
4922 }
4923
4924 enum set_mask_type {
4925 ONLY_FIRST = 1,
4926 INCLUDE_FIRST,
4927 BEFORE_FIRST,
4928 };
4929
vmsetm(void * vd,void * v0,void * vs2,CPURISCVState * env,uint32_t desc,enum set_mask_type type)4930 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4931 uint32_t desc, enum set_mask_type type)
4932 {
4933 uint32_t vm = vext_vm(desc);
4934 uint32_t vl = env->vl;
4935 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4936 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4937 uint32_t vma = vext_vma(desc);
4938 int i;
4939 bool first_mask_bit = false;
4940
4941 VSTART_CHECK_EARLY_EXIT(env, vl);
4942
4943 for (i = env->vstart; i < vl; i++) {
4944 if (!vm && !vext_elem_mask(v0, i)) {
4945 /* set masked-off elements to 1s */
4946 if (vma) {
4947 vext_set_elem_mask(vd, i, 1);
4948 }
4949 continue;
4950 }
4951 /* write a zero to all following active elements */
4952 if (first_mask_bit) {
4953 vext_set_elem_mask(vd, i, 0);
4954 continue;
4955 }
4956 if (vext_elem_mask(vs2, i)) {
4957 first_mask_bit = true;
4958 if (type == BEFORE_FIRST) {
4959 vext_set_elem_mask(vd, i, 0);
4960 } else {
4961 vext_set_elem_mask(vd, i, 1);
4962 }
4963 } else {
4964 if (type == ONLY_FIRST) {
4965 vext_set_elem_mask(vd, i, 0);
4966 } else {
4967 vext_set_elem_mask(vd, i, 1);
4968 }
4969 }
4970 }
4971 env->vstart = 0;
4972 /*
4973 * mask destination register are always tail-agnostic
4974 * set tail elements to 1s
4975 */
4976 if (vta_all_1s) {
4977 for (; i < total_elems; i++) {
4978 vext_set_elem_mask(vd, i, 1);
4979 }
4980 }
4981 }
4982
HELPER(vmsbf_m)4983 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4984 uint32_t desc)
4985 {
4986 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4987 }
4988
HELPER(vmsif_m)4989 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4990 uint32_t desc)
4991 {
4992 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4993 }
4994
HELPER(vmsof_m)4995 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4996 uint32_t desc)
4997 {
4998 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4999 }
5000
5001 /* Vector Iota Instruction */
5002 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
5003 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
5004 uint32_t desc) \
5005 { \
5006 uint32_t vm = vext_vm(desc); \
5007 uint32_t vl = env->vl; \
5008 uint32_t esz = sizeof(ETYPE); \
5009 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5010 uint32_t vta = vext_vta(desc); \
5011 uint32_t vma = vext_vma(desc); \
5012 uint32_t sum = 0; \
5013 int i; \
5014 \
5015 VSTART_CHECK_EARLY_EXIT(env, vl); \
5016 \
5017 for (i = env->vstart; i < vl; i++) { \
5018 if (!vm && !vext_elem_mask(v0, i)) { \
5019 /* set masked-off elements to 1s */ \
5020 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5021 continue; \
5022 } \
5023 *((ETYPE *)vd + H(i)) = sum; \
5024 if (vext_elem_mask(vs2, i)) { \
5025 sum++; \
5026 } \
5027 } \
5028 env->vstart = 0; \
5029 /* set tail elements to 1s */ \
5030 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5031 }
5032
GEN_VEXT_VIOTA_M(viota_m_b,uint8_t,H1)5033 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
5034 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
5035 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
5036 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
5037
5038 /* Vector Element Index Instruction */
5039 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
5040 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
5041 { \
5042 uint32_t vm = vext_vm(desc); \
5043 uint32_t vl = env->vl; \
5044 uint32_t esz = sizeof(ETYPE); \
5045 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5046 uint32_t vta = vext_vta(desc); \
5047 uint32_t vma = vext_vma(desc); \
5048 int i; \
5049 \
5050 VSTART_CHECK_EARLY_EXIT(env, vl); \
5051 \
5052 for (i = env->vstart; i < vl; i++) { \
5053 if (!vm && !vext_elem_mask(v0, i)) { \
5054 /* set masked-off elements to 1s */ \
5055 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5056 continue; \
5057 } \
5058 *((ETYPE *)vd + H(i)) = i; \
5059 } \
5060 env->vstart = 0; \
5061 /* set tail elements to 1s */ \
5062 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5063 }
5064
5065 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
5066 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
5067 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
5068 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
5069
5070 /*
5071 * Vector Permutation Instructions
5072 */
5073
5074 /* Vector Slide Instructions */
5075 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
5076 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5077 CPURISCVState *env, uint32_t desc) \
5078 { \
5079 uint32_t vm = vext_vm(desc); \
5080 uint32_t vl = env->vl; \
5081 uint32_t esz = sizeof(ETYPE); \
5082 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5083 uint32_t vta = vext_vta(desc); \
5084 uint32_t vma = vext_vma(desc); \
5085 target_ulong offset = s1, i_min, i; \
5086 \
5087 VSTART_CHECK_EARLY_EXIT(env, vl); \
5088 \
5089 i_min = MAX(env->vstart, offset); \
5090 for (i = i_min; i < vl; i++) { \
5091 if (!vm && !vext_elem_mask(v0, i)) { \
5092 /* set masked-off elements to 1s */ \
5093 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5094 continue; \
5095 } \
5096 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5097 } \
5098 env->vstart = 0; \
5099 /* set tail elements to 1s */ \
5100 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5101 }
5102
5103 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
5104 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5105 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5106 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5107 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
5108
5109 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
5110 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5111 CPURISCVState *env, uint32_t desc) \
5112 { \
5113 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5114 uint32_t vm = vext_vm(desc); \
5115 uint32_t vl = env->vl; \
5116 uint32_t esz = sizeof(ETYPE); \
5117 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5118 uint32_t vta = vext_vta(desc); \
5119 uint32_t vma = vext_vma(desc); \
5120 target_ulong i_max, i_min, i; \
5121 \
5122 VSTART_CHECK_EARLY_EXIT(env, vl); \
5123 \
5124 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
5125 i_max = MAX(i_min, env->vstart); \
5126 for (i = env->vstart; i < i_max; ++i) { \
5127 if (!vm && !vext_elem_mask(v0, i)) { \
5128 /* set masked-off elements to 1s */ \
5129 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5130 continue; \
5131 } \
5132 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
5133 } \
5134 \
5135 for (i = i_max; i < vl; ++i) { \
5136 if (!vm && !vext_elem_mask(v0, i)) { \
5137 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5138 continue; \
5139 } \
5140 *((ETYPE *)vd + H(i)) = 0; \
5141 } \
5142 \
5143 env->vstart = 0; \
5144 /* set tail elements to 1s */ \
5145 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5146 }
5147
5148 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
5149 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5150 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5151 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5152 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
5153
5154 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5155 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5156 void *vs2, CPURISCVState *env, \
5157 uint32_t desc) \
5158 { \
5159 typedef uint##BITWIDTH##_t ETYPE; \
5160 uint32_t vm = vext_vm(desc); \
5161 uint32_t vl = env->vl; \
5162 uint32_t esz = sizeof(ETYPE); \
5163 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5164 uint32_t vta = vext_vta(desc); \
5165 uint32_t vma = vext_vma(desc); \
5166 uint32_t i; \
5167 \
5168 VSTART_CHECK_EARLY_EXIT(env, vl); \
5169 \
5170 for (i = env->vstart; i < vl; i++) { \
5171 if (!vm && !vext_elem_mask(v0, i)) { \
5172 /* set masked-off elements to 1s */ \
5173 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5174 continue; \
5175 } \
5176 if (i == 0) { \
5177 *((ETYPE *)vd + H(i)) = s1; \
5178 } else { \
5179 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5180 } \
5181 } \
5182 env->vstart = 0; \
5183 /* set tail elements to 1s */ \
5184 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5185 }
5186
5187 GEN_VEXT_VSLIE1UP(8, H1)
5188 GEN_VEXT_VSLIE1UP(16, H2)
5189 GEN_VEXT_VSLIE1UP(32, H4)
5190 GEN_VEXT_VSLIE1UP(64, H8)
5191
5192 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
5193 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5194 CPURISCVState *env, uint32_t desc) \
5195 { \
5196 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5197 }
5198
5199 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
5200 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5201 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5202 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5203 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5204
5205 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5206 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5207 void *vs2, CPURISCVState *env, \
5208 uint32_t desc) \
5209 { \
5210 typedef uint##BITWIDTH##_t ETYPE; \
5211 uint32_t vm = vext_vm(desc); \
5212 uint32_t vl = env->vl; \
5213 uint32_t esz = sizeof(ETYPE); \
5214 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5215 uint32_t vta = vext_vta(desc); \
5216 uint32_t vma = vext_vma(desc); \
5217 uint32_t i; \
5218 \
5219 VSTART_CHECK_EARLY_EXIT(env, vl); \
5220 \
5221 for (i = env->vstart; i < vl; i++) { \
5222 if (!vm && !vext_elem_mask(v0, i)) { \
5223 /* set masked-off elements to 1s */ \
5224 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5225 continue; \
5226 } \
5227 if (i == vl - 1) { \
5228 *((ETYPE *)vd + H(i)) = s1; \
5229 } else { \
5230 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5231 } \
5232 } \
5233 env->vstart = 0; \
5234 /* set tail elements to 1s */ \
5235 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5236 }
5237
5238 GEN_VEXT_VSLIDE1DOWN(8, H1)
5239 GEN_VEXT_VSLIDE1DOWN(16, H2)
5240 GEN_VEXT_VSLIDE1DOWN(32, H4)
5241 GEN_VEXT_VSLIDE1DOWN(64, H8)
5242
5243 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
5244 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5245 CPURISCVState *env, uint32_t desc) \
5246 { \
5247 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5248 }
5249
5250 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
5251 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5252 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5253 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5254 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5255
5256 /* Vector Floating-Point Slide Instructions */
5257 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5258 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5259 CPURISCVState *env, uint32_t desc) \
5260 { \
5261 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5262 }
5263
5264 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5265 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5266 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5267 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5268
5269 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5270 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5271 CPURISCVState *env, uint32_t desc) \
5272 { \
5273 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5274 }
5275
5276 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5277 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5278 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5279 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5280
5281 /* Vector Register Gather Instruction */
5282 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5283 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5284 CPURISCVState *env, uint32_t desc) \
5285 { \
5286 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5287 uint32_t vm = vext_vm(desc); \
5288 uint32_t vl = env->vl; \
5289 uint32_t esz = sizeof(TS2); \
5290 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5291 uint32_t vta = vext_vta(desc); \
5292 uint32_t vma = vext_vma(desc); \
5293 uint64_t index; \
5294 uint32_t i; \
5295 \
5296 VSTART_CHECK_EARLY_EXIT(env, vl); \
5297 \
5298 for (i = env->vstart; i < vl; i++) { \
5299 if (!vm && !vext_elem_mask(v0, i)) { \
5300 /* set masked-off elements to 1s */ \
5301 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5302 continue; \
5303 } \
5304 index = *((TS1 *)vs1 + HS1(i)); \
5305 if (index >= vlmax) { \
5306 *((TS2 *)vd + HS2(i)) = 0; \
5307 } else { \
5308 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5309 } \
5310 } \
5311 env->vstart = 0; \
5312 /* set tail elements to 1s */ \
5313 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5314 }
5315
5316 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5317 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5318 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5319 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5320 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5321
5322 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5323 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5324 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5325 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5326
5327 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5328 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5329 CPURISCVState *env, uint32_t desc) \
5330 { \
5331 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5332 uint32_t vm = vext_vm(desc); \
5333 uint32_t vl = env->vl; \
5334 uint32_t esz = sizeof(ETYPE); \
5335 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5336 uint32_t vta = vext_vta(desc); \
5337 uint32_t vma = vext_vma(desc); \
5338 uint64_t index = s1; \
5339 uint32_t i; \
5340 \
5341 VSTART_CHECK_EARLY_EXIT(env, vl); \
5342 \
5343 for (i = env->vstart; i < vl; i++) { \
5344 if (!vm && !vext_elem_mask(v0, i)) { \
5345 /* set masked-off elements to 1s */ \
5346 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5347 continue; \
5348 } \
5349 if (index >= vlmax) { \
5350 *((ETYPE *)vd + H(i)) = 0; \
5351 } else { \
5352 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5353 } \
5354 } \
5355 env->vstart = 0; \
5356 /* set tail elements to 1s */ \
5357 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5358 }
5359
5360 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5361 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5362 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5363 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5364 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5365
5366 /* Vector Compress Instruction */
5367 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5368 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5369 CPURISCVState *env, uint32_t desc) \
5370 { \
5371 uint32_t vl = env->vl; \
5372 uint32_t esz = sizeof(ETYPE); \
5373 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5374 uint32_t vta = vext_vta(desc); \
5375 uint32_t num = 0, i; \
5376 \
5377 VSTART_CHECK_EARLY_EXIT(env, vl); \
5378 \
5379 for (i = env->vstart; i < vl; i++) { \
5380 if (!vext_elem_mask(vs1, i)) { \
5381 continue; \
5382 } \
5383 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5384 num++; \
5385 } \
5386 env->vstart = 0; \
5387 /* set tail elements to 1s */ \
5388 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
5389 }
5390
5391 /* Compress into vd elements of vs2 where vs1 is enabled */
5392 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5393 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5394 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5395 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5396
5397 /* Vector Whole Register Move */
5398 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5399 {
5400 /* EEW = SEW */
5401 uint32_t maxsz = simd_maxsz(desc);
5402 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5403 uint32_t startb = env->vstart * sewb;
5404 uint32_t i = startb;
5405
5406 if (startb >= maxsz) {
5407 env->vstart = 0;
5408 return;
5409 }
5410
5411 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5412 uint32_t j = ROUND_UP(i, 8);
5413 memcpy((uint8_t *)vd + H1(j - 1),
5414 (uint8_t *)vs2 + H1(j - 1),
5415 j - i);
5416 i = j;
5417 }
5418
5419 memcpy((uint8_t *)vd + H1(i),
5420 (uint8_t *)vs2 + H1(i),
5421 maxsz - i);
5422
5423 env->vstart = 0;
5424 }
5425
5426 /* Vector Integer Extension */
5427 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5428 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5429 CPURISCVState *env, uint32_t desc) \
5430 { \
5431 uint32_t vl = env->vl; \
5432 uint32_t vm = vext_vm(desc); \
5433 uint32_t esz = sizeof(ETYPE); \
5434 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5435 uint32_t vta = vext_vta(desc); \
5436 uint32_t vma = vext_vma(desc); \
5437 uint32_t i; \
5438 \
5439 VSTART_CHECK_EARLY_EXIT(env, vl); \
5440 \
5441 for (i = env->vstart; i < vl; i++) { \
5442 if (!vm && !vext_elem_mask(v0, i)) { \
5443 /* set masked-off elements to 1s */ \
5444 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5445 continue; \
5446 } \
5447 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5448 } \
5449 env->vstart = 0; \
5450 /* set tail elements to 1s */ \
5451 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5452 }
5453
5454 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5455 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5456 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5457 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5458 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5459 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5460
5461 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5462 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5463 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5464 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5465 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5466 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
5467