1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "accel/tcg/cpu-ldst.h"
25 #include "accel/tcg/probe.h"
26 #include "exec/page-protection.h"
27 #include "exec/helper-proto.h"
28 #include "exec/tlb-flags.h"
29 #include "exec/target_page.h"
30 #include "exec/tswap.h"
31 #include "fpu/softfloat.h"
32 #include "tcg/tcg-gvec-desc.h"
33 #include "internals.h"
34 #include "vector_internals.h"
35 #include <math.h>
36
HELPER(vsetvl)37 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
38 target_ulong s2, target_ulong x0)
39 {
40 int vlmax, vl;
41 RISCVCPU *cpu = env_archcpu(env);
42 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
43 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
44 uint16_t sew = 8 << vsew;
45 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
46 int xlen = riscv_cpu_xlen(env);
47 bool vill = (s2 >> (xlen - 1)) & 0x1;
48 target_ulong reserved = s2 &
49 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
50 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
51 uint16_t vlen = cpu->cfg.vlenb << 3;
52 int8_t lmul;
53
54 if (vlmul & 4) {
55 /*
56 * Fractional LMUL, check:
57 *
58 * VLEN * LMUL >= SEW
59 * VLEN >> (8 - lmul) >= sew
60 * (vlenb << 3) >> (8 - lmul) >= sew
61 */
62 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
63 vill = true;
64 }
65 }
66
67 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
68 /* only set vill bit. */
69 env->vill = 1;
70 env->vtype = 0;
71 env->vl = 0;
72 env->vstart = 0;
73 return 0;
74 }
75
76 /* lmul encoded as in DisasContext::lmul */
77 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
78 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
79 if (s1 <= vlmax) {
80 vl = s1;
81 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) {
82 vl = (s1 + 1) >> 1;
83 } else {
84 vl = vlmax;
85 }
86
87 if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) {
88 /* only set vill bit. */
89 env->vill = 1;
90 env->vtype = 0;
91 env->vl = 0;
92 env->vstart = 0;
93 return 0;
94 }
95
96 env->vl = vl;
97 env->vtype = s2;
98 env->vstart = 0;
99 env->vill = 0;
100 return vl;
101 }
102
103 /*
104 * Get the maximum number of elements can be operated.
105 *
106 * log2_esz: log2 of element size in bytes.
107 */
vext_max_elems(uint32_t desc,uint32_t log2_esz)108 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
109 {
110 /*
111 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
112 * so vlen in bytes (vlenb) is encoded as maxsz.
113 */
114 uint32_t vlenb = simd_maxsz(desc);
115
116 /* Return VLMAX */
117 int scale = vext_lmul(desc) - log2_esz;
118 return scale < 0 ? vlenb >> -scale : vlenb << scale;
119 }
120
121 /*
122 * This function checks watchpoint before real load operation.
123 *
124 * In system mode, the TLB API probe_access is enough for watchpoint check.
125 * In user mode, there is no watchpoint support now.
126 *
127 * It will trigger an exception if there is no mapping in TLB
128 * and page table walk can't fill the TLB entry. Then the guest
129 * software can return here after process the exception or never return.
130 *
131 * This function can also be used when direct access to probe_access_flags is
132 * needed in order to access the flags. If a pointer to a flags operand is
133 * provided the function will call probe_access_flags instead, use nonfault
134 * and update host and flags.
135 */
probe_pages(CPURISCVState * env,target_ulong addr,target_ulong len,uintptr_t ra,MMUAccessType access_type,int mmu_index,void ** host,int * flags,bool nonfault)136 static void probe_pages(CPURISCVState *env, target_ulong addr, target_ulong len,
137 uintptr_t ra, MMUAccessType access_type, int mmu_index,
138 void **host, int *flags, bool nonfault)
139 {
140 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
141 target_ulong curlen = MIN(pagelen, len);
142
143 if (flags != NULL) {
144 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen,
145 access_type, mmu_index, nonfault, host, ra);
146 } else {
147 probe_access(env, adjust_addr(env, addr), curlen, access_type,
148 mmu_index, ra);
149 }
150
151 if (len > curlen) {
152 addr += curlen;
153 curlen = len - curlen;
154 if (flags != NULL) {
155 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen,
156 access_type, mmu_index, nonfault,
157 host, ra);
158 } else {
159 probe_access(env, adjust_addr(env, addr), curlen, access_type,
160 mmu_index, ra);
161 }
162 }
163 }
164
165
vext_set_elem_mask(void * v0,int index,uint8_t value)166 static inline void vext_set_elem_mask(void *v0, int index,
167 uint8_t value)
168 {
169 int idx = index / 64;
170 int pos = index % 64;
171 uint64_t old = ((uint64_t *)v0)[idx];
172 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
173 }
174
175 /* elements operations for load and store */
176 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
177 uint32_t idx, void *vd, uintptr_t retaddr);
178 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
179
180 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
181 static inline QEMU_ALWAYS_INLINE \
182 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
183 uint32_t idx, void *vd, uintptr_t retaddr) \
184 { \
185 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
186 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
187 } \
188 \
189 static inline QEMU_ALWAYS_INLINE \
190 void NAME##_host(void *vd, uint32_t idx, void *host) \
191 { \
192 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
193 *cur = (ETYPE)LDSUF##_p(host); \
194 }
195
GEN_VEXT_LD_ELEM(lde_b,uint8_t,H1,ldub)196 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub)
197 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
198 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
199 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
200
201 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
202 static inline QEMU_ALWAYS_INLINE \
203 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
204 uint32_t idx, void *vd, uintptr_t retaddr) \
205 { \
206 ETYPE data = *((ETYPE *)vd + H(idx)); \
207 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
208 } \
209 \
210 static inline QEMU_ALWAYS_INLINE \
211 void NAME##_host(void *vd, uint32_t idx, void *host) \
212 { \
213 ETYPE data = *((ETYPE *)vd + H(idx)); \
214 STSUF##_p(host, data); \
215 }
216
217 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb)
218 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
219 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
220 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
221
222 static inline QEMU_ALWAYS_INLINE void
223 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
224 void *vd, uint32_t evl, target_ulong addr,
225 uint32_t reg_start, uintptr_t ra, uint32_t esz,
226 bool is_load)
227 {
228 uint32_t i;
229 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) {
230 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra);
231 }
232 }
233
234 static inline QEMU_ALWAYS_INLINE void
vext_continuous_ldst_host(CPURISCVState * env,vext_ldst_elem_fn_host * ldst_host,void * vd,uint32_t evl,uint32_t reg_start,void * host,uint32_t esz,bool is_load)235 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
236 void *vd, uint32_t evl, uint32_t reg_start, void *host,
237 uint32_t esz, bool is_load)
238 {
239 #if HOST_BIG_ENDIAN
240 for (; reg_start < evl; reg_start++, host += esz) {
241 ldst_host(vd, reg_start, host);
242 }
243 #else
244 if (esz == 1) {
245 uint32_t byte_offset = reg_start * esz;
246 uint32_t size = (evl - reg_start) * esz;
247
248 if (is_load) {
249 memcpy(vd + byte_offset, host, size);
250 } else {
251 memcpy(host, vd + byte_offset, size);
252 }
253 } else {
254 for (; reg_start < evl; reg_start++, host += esz) {
255 ldst_host(vd, reg_start, host);
256 }
257 }
258 #endif
259 }
260
vext_set_tail_elems_1s(target_ulong vl,void * vd,uint32_t desc,uint32_t nf,uint32_t esz,uint32_t max_elems)261 static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
262 uint32_t desc, uint32_t nf,
263 uint32_t esz, uint32_t max_elems)
264 {
265 uint32_t vta = vext_vta(desc);
266 int k;
267
268 if (vta == 0) {
269 return;
270 }
271
272 for (k = 0; k < nf; ++k) {
273 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
274 (k * max_elems + max_elems) * esz);
275 }
276 }
277
278 /*
279 * stride: access vector element from strided memory
280 */
281 static void
vext_ldst_stride(void * vd,void * v0,target_ulong base,target_ulong stride,CPURISCVState * env,uint32_t desc,uint32_t vm,vext_ldst_elem_fn_tlb * ldst_elem,uint32_t log2_esz,uintptr_t ra)282 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride,
283 CPURISCVState *env, uint32_t desc, uint32_t vm,
284 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz,
285 uintptr_t ra)
286 {
287 uint32_t i, k;
288 uint32_t nf = vext_nf(desc);
289 uint32_t max_elems = vext_max_elems(desc, log2_esz);
290 uint32_t esz = 1 << log2_esz;
291 uint32_t vma = vext_vma(desc);
292
293 VSTART_CHECK_EARLY_EXIT(env, env->vl);
294
295 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
296 k = 0;
297 while (k < nf) {
298 if (!vm && !vext_elem_mask(v0, i)) {
299 /* set masked-off elements to 1s */
300 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
301 (i + k * max_elems + 1) * esz);
302 k++;
303 continue;
304 }
305 target_ulong addr = base + stride * i + (k << log2_esz);
306 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
307 k++;
308 }
309 }
310 env->vstart = 0;
311
312 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
313 }
314
315 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
316 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
317 target_ulong stride, CPURISCVState *env, \
318 uint32_t desc) \
319 { \
320 uint32_t vm = vext_vm(desc); \
321 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
322 ctzl(sizeof(ETYPE)), GETPC()); \
323 }
324
GEN_VEXT_LD_STRIDE(vlse8_v,int8_t,lde_b_tlb)325 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb)
326 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb)
327 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb)
328 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb)
329
330 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
331 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
332 target_ulong stride, CPURISCVState *env, \
333 uint32_t desc) \
334 { \
335 uint32_t vm = vext_vm(desc); \
336 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
337 ctzl(sizeof(ETYPE)), GETPC()); \
338 }
339
340 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb)
341 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb)
342 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb)
343 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
344
345 /*
346 * unit-stride: access elements stored contiguously in memory
347 */
348
349 /* unmasked unit-stride load and store operation */
350 static inline QEMU_ALWAYS_INLINE void
351 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
352 uint32_t elems, uint32_t nf, uint32_t max_elems,
353 uint32_t log2_esz, bool is_load, int mmu_index,
354 vext_ldst_elem_fn_tlb *ldst_tlb,
355 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra)
356 {
357 void *host;
358 int i, k, flags;
359 uint32_t esz = 1 << log2_esz;
360 uint32_t size = (elems * nf) << log2_esz;
361 uint32_t evl = env->vstart + elems;
362 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
363
364 /* Check page permission/pmp/watchpoint/etc. */
365 probe_pages(env, addr, size, ra, access_type, mmu_index, &host, &flags,
366 true);
367
368 if (flags == 0) {
369 if (nf == 1) {
370 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart,
371 host, esz, is_load);
372 } else {
373 for (i = env->vstart; i < evl; ++i) {
374 k = 0;
375 while (k < nf) {
376 ldst_host(vd, i + k * max_elems, host);
377 host += esz;
378 k++;
379 }
380 }
381 }
382 env->vstart += elems;
383 } else {
384 if (nf == 1) {
385 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart,
386 ra, esz, is_load);
387 } else {
388 /* load bytes from guest memory */
389 for (i = env->vstart; i < evl; env->vstart = ++i) {
390 k = 0;
391 while (k < nf) {
392 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
393 vd, ra);
394 addr += esz;
395 k++;
396 }
397 }
398 }
399 }
400 }
401
402 static inline QEMU_ALWAYS_INLINE void
vext_ldst_us(void * vd,target_ulong base,CPURISCVState * env,uint32_t desc,vext_ldst_elem_fn_tlb * ldst_tlb,vext_ldst_elem_fn_host * ldst_host,uint32_t log2_esz,uint32_t evl,uintptr_t ra,bool is_load)403 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
404 vext_ldst_elem_fn_tlb *ldst_tlb,
405 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
406 uint32_t evl, uintptr_t ra, bool is_load)
407 {
408 uint32_t k;
409 target_ulong page_split, elems, addr;
410 uint32_t nf = vext_nf(desc);
411 uint32_t max_elems = vext_max_elems(desc, log2_esz);
412 uint32_t esz = 1 << log2_esz;
413 uint32_t msize = nf * esz;
414 int mmu_index = riscv_env_mmu_index(env, false);
415
416 VSTART_CHECK_EARLY_EXIT(env, evl);
417
418 #if defined(CONFIG_USER_ONLY)
419 /*
420 * For data sizes <= 6 bytes we get better performance by simply calling
421 * vext_continuous_ldst_tlb
422 */
423 if (nf == 1 && (evl << log2_esz) <= 6) {
424 addr = base + (env->vstart << log2_esz);
425 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra,
426 esz, is_load);
427
428 env->vstart = 0;
429 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
430 return;
431 }
432 #endif
433
434 /* Calculate the page range of first page */
435 addr = base + ((env->vstart * nf) << log2_esz);
436 page_split = -(addr | TARGET_PAGE_MASK);
437 /* Get number of elements */
438 elems = page_split / msize;
439 if (unlikely(env->vstart + elems >= evl)) {
440 elems = evl - env->vstart;
441 }
442
443 /* Load/store elements in the first page */
444 if (likely(elems)) {
445 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
446 is_load, mmu_index, ldst_tlb, ldst_host, ra);
447 }
448
449 /* Load/store elements in the second page */
450 if (unlikely(env->vstart < evl)) {
451 /* Cross page element */
452 if (unlikely(page_split % msize)) {
453 for (k = 0; k < nf; k++) {
454 addr = base + ((env->vstart * nf + k) << log2_esz);
455 ldst_tlb(env, adjust_addr(env, addr),
456 env->vstart + k * max_elems, vd, ra);
457 }
458 env->vstart++;
459 }
460
461 addr = base + ((env->vstart * nf) << log2_esz);
462 /* Get number of elements of second page */
463 elems = evl - env->vstart;
464
465 /* Load/store elements in the second page */
466 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
467 is_load, mmu_index, ldst_tlb, ldst_host, ra);
468 }
469
470 env->vstart = 0;
471 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
472 }
473
474 /*
475 * masked unit-stride load and store operation will be a special case of
476 * stride, stride = NF * sizeof (ETYPE)
477 */
478
479 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
480 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
481 CPURISCVState *env, uint32_t desc) \
482 { \
483 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
484 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
485 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
486 } \
487 \
488 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
489 CPURISCVState *env, uint32_t desc) \
490 { \
491 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
492 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \
493 }
494
GEN_VEXT_LD_US(vle8_v,int8_t,lde_b_tlb,lde_b_host)495 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host)
496 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host)
497 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host)
498 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host)
499
500 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
501 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
502 CPURISCVState *env, uint32_t desc) \
503 { \
504 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
505 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
506 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
507 } \
508 \
509 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
510 CPURISCVState *env, uint32_t desc) \
511 { \
512 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
513 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \
514 }
515
516 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host)
517 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host)
518 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host)
519 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host)
520
521 /*
522 * unit stride mask load and store, EEW = 1
523 */
524 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
525 CPURISCVState *env, uint32_t desc)
526 {
527 /* evl = ceil(vl/8) */
528 uint8_t evl = (env->vl + 7) >> 3;
529 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host,
530 0, evl, GETPC(), true);
531 }
532
HELPER(vsm_v)533 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
534 CPURISCVState *env, uint32_t desc)
535 {
536 /* evl = ceil(vl/8) */
537 uint8_t evl = (env->vl + 7) >> 3;
538 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host,
539 0, evl, GETPC(), false);
540 }
541
542 /*
543 * index: access vector element from indexed memory
544 */
545 typedef target_ulong vext_get_index_addr(target_ulong base,
546 uint32_t idx, void *vs2);
547
548 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
549 static target_ulong NAME(target_ulong base, \
550 uint32_t idx, void *vs2) \
551 { \
552 return (base + *((ETYPE *)vs2 + H(idx))); \
553 }
554
GEN_VEXT_GET_INDEX_ADDR(idx_b,uint8_t,H1)555 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
556 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
557 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
558 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
559
560 static inline void
561 vext_ldst_index(void *vd, void *v0, target_ulong base,
562 void *vs2, CPURISCVState *env, uint32_t desc,
563 vext_get_index_addr get_index_addr,
564 vext_ldst_elem_fn_tlb *ldst_elem,
565 uint32_t log2_esz, uintptr_t ra)
566 {
567 uint32_t i, k;
568 uint32_t nf = vext_nf(desc);
569 uint32_t vm = vext_vm(desc);
570 uint32_t max_elems = vext_max_elems(desc, log2_esz);
571 uint32_t esz = 1 << log2_esz;
572 uint32_t vma = vext_vma(desc);
573
574 VSTART_CHECK_EARLY_EXIT(env, env->vl);
575
576 /* load bytes from guest memory */
577 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
578 k = 0;
579 while (k < nf) {
580 if (!vm && !vext_elem_mask(v0, i)) {
581 /* set masked-off elements to 1s */
582 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
583 (i + k * max_elems + 1) * esz);
584 k++;
585 continue;
586 }
587 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
588 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
589 k++;
590 }
591 }
592 env->vstart = 0;
593
594 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
595 }
596
597 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
598 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
599 void *vs2, CPURISCVState *env, uint32_t desc) \
600 { \
601 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
602 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
603 }
604
GEN_VEXT_LD_INDEX(vlxei8_8_v,int8_t,idx_b,lde_b_tlb)605 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb)
606 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb)
607 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb)
608 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb)
609 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb)
610 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb)
611 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb)
612 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb)
613 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb)
614 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb)
615 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb)
616 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb)
617 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb)
618 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb)
619 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb)
620 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb)
621
622 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
623 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
624 void *vs2, CPURISCVState *env, uint32_t desc) \
625 { \
626 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
627 STORE_FN, ctzl(sizeof(ETYPE)), \
628 GETPC()); \
629 }
630
631 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb)
632 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb)
633 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb)
634 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb)
635 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb)
636 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb)
637 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb)
638 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb)
639 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb)
640 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb)
641 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb)
642 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb)
643 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb)
644 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb)
645 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb)
646 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb)
647
648 /*
649 * unit-stride fault-only-fisrt load instructions
650 */
651 static inline void
652 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env,
653 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb,
654 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra)
655 {
656 uint32_t i, k, vl = 0;
657 uint32_t nf = vext_nf(desc);
658 uint32_t vm = vext_vm(desc);
659 uint32_t max_elems = vext_max_elems(desc, log2_esz);
660 uint32_t esz = 1 << log2_esz;
661 uint32_t msize = nf * esz;
662 uint32_t vma = vext_vma(desc);
663 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems;
664 int mmu_index = riscv_env_mmu_index(env, false);
665 int flags, probe_flags;
666 void *host;
667
668 VSTART_CHECK_EARLY_EXIT(env, env->vl);
669
670 addr = base + ((env->vstart * nf) << log2_esz);
671 page_split = -(addr | TARGET_PAGE_MASK);
672 /* Get number of elements */
673 elems = page_split / msize;
674 if (unlikely(env->vstart + elems >= env->vl)) {
675 elems = env->vl - env->vstart;
676 }
677
678 /* Check page permission/pmp/watchpoint/etc. */
679 probe_pages(env, addr, elems * msize, ra, MMU_DATA_LOAD, mmu_index, &host,
680 &flags, true);
681
682 /* If we are crossing a page check also the second page. */
683 if (env->vl > elems) {
684 addr_probe = addr + (elems << log2_esz);
685 probe_pages(env, addr_probe, elems * msize, ra, MMU_DATA_LOAD,
686 mmu_index, &host, &probe_flags, true);
687 flags |= probe_flags;
688 }
689
690 if (flags & ~TLB_WATCHPOINT) {
691 /* probe every access */
692 for (i = env->vstart; i < env->vl; i++) {
693 if (!vm && !vext_elem_mask(v0, i)) {
694 continue;
695 }
696 addr_i = adjust_addr(env, base + i * (nf << log2_esz));
697 if (i == 0) {
698 /* Allow fault on first element. */
699 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD,
700 mmu_index, &host, NULL, false);
701 } else {
702 remain = nf << log2_esz;
703 while (remain > 0) {
704 offset = -(addr_i | TARGET_PAGE_MASK);
705
706 /* Probe nonfault on subsequent elements. */
707 probe_pages(env, addr_i, offset, 0, MMU_DATA_LOAD,
708 mmu_index, &host, &flags, true);
709
710 /*
711 * Stop if invalid (unmapped) or mmio (transaction may
712 * fail). Do not stop if watchpoint, as the spec says that
713 * first-fault should continue to access the same
714 * elements regardless of any watchpoint.
715 */
716 if (flags & ~TLB_WATCHPOINT) {
717 vl = i;
718 goto ProbeSuccess;
719 }
720 if (remain <= offset) {
721 break;
722 }
723 remain -= offset;
724 addr_i = adjust_addr(env, addr_i + offset);
725 }
726 }
727 }
728 }
729 ProbeSuccess:
730 /* load bytes from guest memory */
731 if (vl != 0) {
732 env->vl = vl;
733 }
734
735 if (env->vstart < env->vl) {
736 if (vm) {
737 /* Load/store elements in the first page */
738 if (likely(elems)) {
739 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
740 log2_esz, true, mmu_index, ldst_tlb,
741 ldst_host, ra);
742 }
743
744 /* Load/store elements in the second page */
745 if (unlikely(env->vstart < env->vl)) {
746 /* Cross page element */
747 if (unlikely(page_split % msize)) {
748 for (k = 0; k < nf; k++) {
749 addr = base + ((env->vstart * nf + k) << log2_esz);
750 ldst_tlb(env, adjust_addr(env, addr),
751 env->vstart + k * max_elems, vd, ra);
752 }
753 env->vstart++;
754 }
755
756 addr = base + ((env->vstart * nf) << log2_esz);
757 /* Get number of elements of second page */
758 elems = env->vl - env->vstart;
759
760 /* Load/store elements in the second page */
761 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
762 log2_esz, true, mmu_index, ldst_tlb,
763 ldst_host, ra);
764 }
765 } else {
766 for (i = env->vstart; i < env->vl; i++) {
767 k = 0;
768 while (k < nf) {
769 if (!vext_elem_mask(v0, i)) {
770 /* set masked-off elements to 1s */
771 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
772 (i + k * max_elems + 1) * esz);
773 k++;
774 continue;
775 }
776 addr = base + ((i * nf + k) << log2_esz);
777 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
778 vd, ra);
779 k++;
780 }
781 }
782 }
783 }
784 env->vstart = 0;
785
786 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
787 }
788
789 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
790 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
791 CPURISCVState *env, uint32_t desc) \
792 { \
793 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \
794 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \
795 }
796
GEN_VEXT_LDFF(vle8ff_v,int8_t,lde_b_tlb,lde_b_host)797 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host)
798 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host)
799 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host)
800 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host)
801
802 #define DO_SWAP(N, M) (M)
803 #define DO_AND(N, M) (N & M)
804 #define DO_XOR(N, M) (N ^ M)
805 #define DO_OR(N, M) (N | M)
806 #define DO_ADD(N, M) (N + M)
807
808 /* Signed min/max */
809 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
810 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
811
812 /*
813 * load and store whole register instructions
814 */
815 static inline QEMU_ALWAYS_INLINE void
816 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
817 vext_ldst_elem_fn_tlb *ldst_tlb,
818 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
819 uintptr_t ra, bool is_load)
820 {
821 target_ulong page_split, elems, addr;
822 uint32_t nf = vext_nf(desc);
823 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
824 uint32_t max_elems = vlenb >> log2_esz;
825 uint32_t evl = nf * max_elems;
826 uint32_t esz = 1 << log2_esz;
827 int mmu_index = riscv_env_mmu_index(env, false);
828
829 /* Calculate the page range of first page */
830 addr = base + (env->vstart << log2_esz);
831 page_split = -(addr | TARGET_PAGE_MASK);
832 /* Get number of elements */
833 elems = page_split / esz;
834 if (unlikely(env->vstart + elems >= evl)) {
835 elems = evl - env->vstart;
836 }
837
838 /* Load/store elements in the first page */
839 if (likely(elems)) {
840 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
841 is_load, mmu_index, ldst_tlb, ldst_host, ra);
842 }
843
844 /* Load/store elements in the second page */
845 if (unlikely(env->vstart < evl)) {
846 /* Cross page element */
847 if (unlikely(page_split % esz)) {
848 addr = base + (env->vstart << log2_esz);
849 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra);
850 env->vstart++;
851 }
852
853 addr = base + (env->vstart << log2_esz);
854 /* Get number of elements of second page */
855 elems = evl - env->vstart;
856
857 /* Load/store elements in the second page */
858 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
859 is_load, mmu_index, ldst_tlb, ldst_host, ra);
860 }
861
862 env->vstart = 0;
863 }
864
865 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
866 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
867 uint32_t desc) \
868 { \
869 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
870 ctzl(sizeof(ETYPE)), GETPC(), true); \
871 }
872
GEN_VEXT_LD_WHOLE(vl1re8_v,int8_t,lde_b_tlb,lde_b_host)873 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host)
874 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host)
875 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host)
876 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host)
877 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host)
878 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host)
879 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host)
880 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host)
881 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host)
882 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host)
883 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host)
884 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host)
885 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host)
886 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host)
887 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host)
888 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host)
889
890 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
891 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
892 uint32_t desc) \
893 { \
894 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
895 ctzl(sizeof(ETYPE)), GETPC(), false); \
896 }
897
898 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host)
899 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host)
900 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host)
901 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host)
902
903 /*
904 * Vector Integer Arithmetic Instructions
905 */
906
907 /* (TD, T1, T2, TX1, TX2) */
908 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
909 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
910 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
911 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
912 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
913 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
914 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
915 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
916 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
917 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
918 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
919 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
920 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
921 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
922 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
923 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
924 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
925 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
926 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
927 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
928 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
929 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
930 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
931
932 #define DO_SUB(N, M) (N - M)
933 #define DO_RSUB(N, M) (M - N)
934
935 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
936 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
937 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
938 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
939 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
940 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
941 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
942 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
943
944 GEN_VEXT_VV(vadd_vv_b, 1)
945 GEN_VEXT_VV(vadd_vv_h, 2)
946 GEN_VEXT_VV(vadd_vv_w, 4)
947 GEN_VEXT_VV(vadd_vv_d, 8)
948 GEN_VEXT_VV(vsub_vv_b, 1)
949 GEN_VEXT_VV(vsub_vv_h, 2)
950 GEN_VEXT_VV(vsub_vv_w, 4)
951 GEN_VEXT_VV(vsub_vv_d, 8)
952
953
954 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
955 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
956 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
957 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
958 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
959 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
960 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
961 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
962 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
963 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
964 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
965 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
966
967 GEN_VEXT_VX(vadd_vx_b, 1)
968 GEN_VEXT_VX(vadd_vx_h, 2)
969 GEN_VEXT_VX(vadd_vx_w, 4)
970 GEN_VEXT_VX(vadd_vx_d, 8)
971 GEN_VEXT_VX(vsub_vx_b, 1)
972 GEN_VEXT_VX(vsub_vx_h, 2)
973 GEN_VEXT_VX(vsub_vx_w, 4)
974 GEN_VEXT_VX(vsub_vx_d, 8)
975 GEN_VEXT_VX(vrsub_vx_b, 1)
976 GEN_VEXT_VX(vrsub_vx_h, 2)
977 GEN_VEXT_VX(vrsub_vx_w, 4)
978 GEN_VEXT_VX(vrsub_vx_d, 8)
979
980 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
981 {
982 intptr_t oprsz = simd_oprsz(desc);
983 intptr_t i;
984
985 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
986 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
987 }
988 }
989
HELPER(vec_rsubs16)990 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
991 {
992 intptr_t oprsz = simd_oprsz(desc);
993 intptr_t i;
994
995 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
996 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
997 }
998 }
999
HELPER(vec_rsubs32)1000 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
1001 {
1002 intptr_t oprsz = simd_oprsz(desc);
1003 intptr_t i;
1004
1005 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1006 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
1007 }
1008 }
1009
HELPER(vec_rsubs64)1010 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
1011 {
1012 intptr_t oprsz = simd_oprsz(desc);
1013 intptr_t i;
1014
1015 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1016 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
1017 }
1018 }
1019
1020 /* Vector Widening Integer Add/Subtract */
1021 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
1022 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
1023 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
1024 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
1025 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
1026 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
1027 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
1028 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
1029 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
1030 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
1031 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
1032 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
RVVCALL(OPIVV2,vwaddu_vv_b,WOP_UUU_B,H2,H1,H1,DO_ADD)1033 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
1034 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
1035 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
1036 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
1037 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
1038 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1039 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1040 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1041 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1042 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1043 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1044 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1045 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1046 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1047 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1048 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1049 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1050 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1051 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1052 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1053 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1054 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1055 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1056 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1057 GEN_VEXT_VV(vwaddu_vv_b, 2)
1058 GEN_VEXT_VV(vwaddu_vv_h, 4)
1059 GEN_VEXT_VV(vwaddu_vv_w, 8)
1060 GEN_VEXT_VV(vwsubu_vv_b, 2)
1061 GEN_VEXT_VV(vwsubu_vv_h, 4)
1062 GEN_VEXT_VV(vwsubu_vv_w, 8)
1063 GEN_VEXT_VV(vwadd_vv_b, 2)
1064 GEN_VEXT_VV(vwadd_vv_h, 4)
1065 GEN_VEXT_VV(vwadd_vv_w, 8)
1066 GEN_VEXT_VV(vwsub_vv_b, 2)
1067 GEN_VEXT_VV(vwsub_vv_h, 4)
1068 GEN_VEXT_VV(vwsub_vv_w, 8)
1069 GEN_VEXT_VV(vwaddu_wv_b, 2)
1070 GEN_VEXT_VV(vwaddu_wv_h, 4)
1071 GEN_VEXT_VV(vwaddu_wv_w, 8)
1072 GEN_VEXT_VV(vwsubu_wv_b, 2)
1073 GEN_VEXT_VV(vwsubu_wv_h, 4)
1074 GEN_VEXT_VV(vwsubu_wv_w, 8)
1075 GEN_VEXT_VV(vwadd_wv_b, 2)
1076 GEN_VEXT_VV(vwadd_wv_h, 4)
1077 GEN_VEXT_VV(vwadd_wv_w, 8)
1078 GEN_VEXT_VV(vwsub_wv_b, 2)
1079 GEN_VEXT_VV(vwsub_wv_h, 4)
1080 GEN_VEXT_VV(vwsub_wv_w, 8)
1081
1082 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1083 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1084 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1085 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1086 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1087 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1088 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1089 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1090 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1091 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1092 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1093 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1094 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1095 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1096 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1097 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1098 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1099 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1100 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1101 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1102 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1103 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1104 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1105 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1106 GEN_VEXT_VX(vwaddu_vx_b, 2)
1107 GEN_VEXT_VX(vwaddu_vx_h, 4)
1108 GEN_VEXT_VX(vwaddu_vx_w, 8)
1109 GEN_VEXT_VX(vwsubu_vx_b, 2)
1110 GEN_VEXT_VX(vwsubu_vx_h, 4)
1111 GEN_VEXT_VX(vwsubu_vx_w, 8)
1112 GEN_VEXT_VX(vwadd_vx_b, 2)
1113 GEN_VEXT_VX(vwadd_vx_h, 4)
1114 GEN_VEXT_VX(vwadd_vx_w, 8)
1115 GEN_VEXT_VX(vwsub_vx_b, 2)
1116 GEN_VEXT_VX(vwsub_vx_h, 4)
1117 GEN_VEXT_VX(vwsub_vx_w, 8)
1118 GEN_VEXT_VX(vwaddu_wx_b, 2)
1119 GEN_VEXT_VX(vwaddu_wx_h, 4)
1120 GEN_VEXT_VX(vwaddu_wx_w, 8)
1121 GEN_VEXT_VX(vwsubu_wx_b, 2)
1122 GEN_VEXT_VX(vwsubu_wx_h, 4)
1123 GEN_VEXT_VX(vwsubu_wx_w, 8)
1124 GEN_VEXT_VX(vwadd_wx_b, 2)
1125 GEN_VEXT_VX(vwadd_wx_h, 4)
1126 GEN_VEXT_VX(vwadd_wx_w, 8)
1127 GEN_VEXT_VX(vwsub_wx_b, 2)
1128 GEN_VEXT_VX(vwsub_wx_h, 4)
1129 GEN_VEXT_VX(vwsub_wx_w, 8)
1130
1131 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1132 #define DO_VADC(N, M, C) (N + M + C)
1133 #define DO_VSBC(N, M, C) (N - M - C)
1134
1135 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1136 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1137 CPURISCVState *env, uint32_t desc) \
1138 { \
1139 uint32_t vl = env->vl; \
1140 uint32_t esz = sizeof(ETYPE); \
1141 uint32_t total_elems = \
1142 vext_get_total_elems(env, desc, esz); \
1143 uint32_t vta = vext_vta(desc); \
1144 uint32_t i; \
1145 \
1146 VSTART_CHECK_EARLY_EXIT(env, vl); \
1147 \
1148 for (i = env->vstart; i < vl; i++) { \
1149 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1150 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1151 ETYPE carry = vext_elem_mask(v0, i); \
1152 \
1153 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1154 } \
1155 env->vstart = 0; \
1156 /* set tail elements to 1s */ \
1157 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1158 }
1159
1160 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1161 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1162 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1163 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1164
1165 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1166 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1167 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1168 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1169
1170 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1171 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1172 CPURISCVState *env, uint32_t desc) \
1173 { \
1174 uint32_t vl = env->vl; \
1175 uint32_t esz = sizeof(ETYPE); \
1176 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1177 uint32_t vta = vext_vta(desc); \
1178 uint32_t i; \
1179 \
1180 VSTART_CHECK_EARLY_EXIT(env, vl); \
1181 \
1182 for (i = env->vstart; i < vl; i++) { \
1183 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1184 ETYPE carry = vext_elem_mask(v0, i); \
1185 \
1186 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1187 } \
1188 env->vstart = 0; \
1189 /* set tail elements to 1s */ \
1190 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1191 }
1192
1193 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1194 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1195 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1196 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1197
1198 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1199 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1200 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1201 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1202
1203 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1204 (__typeof(N))(N + M) < N)
1205 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1206
1207 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1208 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1209 CPURISCVState *env, uint32_t desc) \
1210 { \
1211 uint32_t vl = env->vl; \
1212 uint32_t vm = vext_vm(desc); \
1213 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1214 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1215 uint32_t i; \
1216 \
1217 VSTART_CHECK_EARLY_EXIT(env, vl); \
1218 \
1219 for (i = env->vstart; i < vl; i++) { \
1220 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1221 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1222 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1223 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1224 } \
1225 env->vstart = 0; \
1226 /*
1227 * mask destination register are always tail-agnostic
1228 * set tail elements to 1s
1229 */ \
1230 if (vta_all_1s) { \
1231 for (; i < total_elems; i++) { \
1232 vext_set_elem_mask(vd, i, 1); \
1233 } \
1234 } \
1235 }
1236
1237 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1238 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1239 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1240 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1241
1242 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1243 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1244 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1245 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1246
1247 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1248 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1249 void *vs2, CPURISCVState *env, uint32_t desc) \
1250 { \
1251 uint32_t vl = env->vl; \
1252 uint32_t vm = vext_vm(desc); \
1253 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1254 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1255 uint32_t i; \
1256 \
1257 VSTART_CHECK_EARLY_EXIT(env, vl); \
1258 \
1259 for (i = env->vstart; i < vl; i++) { \
1260 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1261 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1262 vext_set_elem_mask(vd, i, \
1263 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1264 } \
1265 env->vstart = 0; \
1266 /*
1267 * mask destination register are always tail-agnostic
1268 * set tail elements to 1s
1269 */ \
1270 if (vta_all_1s) { \
1271 for (; i < total_elems; i++) { \
1272 vext_set_elem_mask(vd, i, 1); \
1273 } \
1274 } \
1275 }
1276
1277 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1278 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1279 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1280 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1281
1282 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1283 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1284 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1285 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1286
1287 /* Vector Bitwise Logical Instructions */
1288 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1289 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1290 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1291 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1292 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1293 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1294 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1295 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1296 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1297 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1298 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1299 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1300 GEN_VEXT_VV(vand_vv_b, 1)
1301 GEN_VEXT_VV(vand_vv_h, 2)
1302 GEN_VEXT_VV(vand_vv_w, 4)
1303 GEN_VEXT_VV(vand_vv_d, 8)
1304 GEN_VEXT_VV(vor_vv_b, 1)
1305 GEN_VEXT_VV(vor_vv_h, 2)
1306 GEN_VEXT_VV(vor_vv_w, 4)
1307 GEN_VEXT_VV(vor_vv_d, 8)
1308 GEN_VEXT_VV(vxor_vv_b, 1)
1309 GEN_VEXT_VV(vxor_vv_h, 2)
1310 GEN_VEXT_VV(vxor_vv_w, 4)
1311 GEN_VEXT_VV(vxor_vv_d, 8)
1312
1313 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1314 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1315 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1316 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1317 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1318 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1319 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1320 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1321 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1322 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1323 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1324 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1325 GEN_VEXT_VX(vand_vx_b, 1)
1326 GEN_VEXT_VX(vand_vx_h, 2)
1327 GEN_VEXT_VX(vand_vx_w, 4)
1328 GEN_VEXT_VX(vand_vx_d, 8)
1329 GEN_VEXT_VX(vor_vx_b, 1)
1330 GEN_VEXT_VX(vor_vx_h, 2)
1331 GEN_VEXT_VX(vor_vx_w, 4)
1332 GEN_VEXT_VX(vor_vx_d, 8)
1333 GEN_VEXT_VX(vxor_vx_b, 1)
1334 GEN_VEXT_VX(vxor_vx_h, 2)
1335 GEN_VEXT_VX(vxor_vx_w, 4)
1336 GEN_VEXT_VX(vxor_vx_d, 8)
1337
1338 /* Vector Single-Width Bit Shift Instructions */
1339 #define DO_SLL(N, M) (N << (M))
1340 #define DO_SRL(N, M) (N >> (M))
1341
1342 /* generate the helpers for shift instructions with two vector operators */
1343 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1344 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1345 void *vs2, CPURISCVState *env, uint32_t desc) \
1346 { \
1347 uint32_t vm = vext_vm(desc); \
1348 uint32_t vl = env->vl; \
1349 uint32_t esz = sizeof(TS1); \
1350 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1351 uint32_t vta = vext_vta(desc); \
1352 uint32_t vma = vext_vma(desc); \
1353 uint32_t i; \
1354 \
1355 VSTART_CHECK_EARLY_EXIT(env, vl); \
1356 \
1357 for (i = env->vstart; i < vl; i++) { \
1358 if (!vm && !vext_elem_mask(v0, i)) { \
1359 /* set masked-off elements to 1s */ \
1360 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1361 continue; \
1362 } \
1363 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1364 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1365 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1366 } \
1367 env->vstart = 0; \
1368 /* set tail elements to 1s */ \
1369 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1370 }
1371
1372 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1373 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1374 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1375 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1376
1377 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1378 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1379 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1380 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1381
1382 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1383 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1384 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1385 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1386
1387 /*
1388 * generate the helpers for shift instructions with one vector and one scalar
1389 */
1390 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1391 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1392 void *vs2, CPURISCVState *env, \
1393 uint32_t desc) \
1394 { \
1395 uint32_t vm = vext_vm(desc); \
1396 uint32_t vl = env->vl; \
1397 uint32_t esz = sizeof(TD); \
1398 uint32_t total_elems = \
1399 vext_get_total_elems(env, desc, esz); \
1400 uint32_t vta = vext_vta(desc); \
1401 uint32_t vma = vext_vma(desc); \
1402 uint32_t i; \
1403 \
1404 VSTART_CHECK_EARLY_EXIT(env, vl); \
1405 \
1406 for (i = env->vstart; i < vl; i++) { \
1407 if (!vm && !vext_elem_mask(v0, i)) { \
1408 /* set masked-off elements to 1s */ \
1409 vext_set_elems_1s(vd, vma, i * esz, \
1410 (i + 1) * esz); \
1411 continue; \
1412 } \
1413 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1414 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1415 } \
1416 env->vstart = 0; \
1417 /* set tail elements to 1s */ \
1418 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1419 }
1420
1421 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1422 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1423 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1424 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1425
1426 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1427 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1428 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1429 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1430
1431 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1432 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1433 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1434 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1435
1436 /* Vector Narrowing Integer Right Shift Instructions */
1437 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1438 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1439 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1440 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1441 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1442 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1443 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1444 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1445 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1446 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1447 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1448 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1449
1450 /* Vector Integer Comparison Instructions */
1451 #define DO_MSEQ(N, M) (N == M)
1452 #define DO_MSNE(N, M) (N != M)
1453 #define DO_MSLT(N, M) (N < M)
1454 #define DO_MSLE(N, M) (N <= M)
1455 #define DO_MSGT(N, M) (N > M)
1456
1457 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1458 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1459 CPURISCVState *env, uint32_t desc) \
1460 { \
1461 uint32_t vm = vext_vm(desc); \
1462 uint32_t vl = env->vl; \
1463 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1464 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1465 uint32_t vma = vext_vma(desc); \
1466 uint32_t i; \
1467 \
1468 VSTART_CHECK_EARLY_EXIT(env, vl); \
1469 \
1470 for (i = env->vstart; i < vl; i++) { \
1471 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1472 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1473 if (!vm && !vext_elem_mask(v0, i)) { \
1474 /* set masked-off elements to 1s */ \
1475 if (vma) { \
1476 vext_set_elem_mask(vd, i, 1); \
1477 } \
1478 continue; \
1479 } \
1480 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1481 } \
1482 env->vstart = 0; \
1483 /*
1484 * mask destination register are always tail-agnostic
1485 * set tail elements to 1s
1486 */ \
1487 if (vta_all_1s) { \
1488 for (; i < total_elems; i++) { \
1489 vext_set_elem_mask(vd, i, 1); \
1490 } \
1491 } \
1492 }
1493
1494 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1495 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1496 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1497 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1498
1499 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1500 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1501 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1502 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1503
1504 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1505 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1506 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1507 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1508
1509 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1510 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1511 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1512 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1513
1514 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1515 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1516 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1517 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1518
1519 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1520 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1521 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1522 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1523
1524 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1525 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1526 CPURISCVState *env, uint32_t desc) \
1527 { \
1528 uint32_t vm = vext_vm(desc); \
1529 uint32_t vl = env->vl; \
1530 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1531 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1532 uint32_t vma = vext_vma(desc); \
1533 uint32_t i; \
1534 \
1535 VSTART_CHECK_EARLY_EXIT(env, vl); \
1536 \
1537 for (i = env->vstart; i < vl; i++) { \
1538 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1539 if (!vm && !vext_elem_mask(v0, i)) { \
1540 /* set masked-off elements to 1s */ \
1541 if (vma) { \
1542 vext_set_elem_mask(vd, i, 1); \
1543 } \
1544 continue; \
1545 } \
1546 vext_set_elem_mask(vd, i, \
1547 DO_OP(s2, (ETYPE)(target_long)s1)); \
1548 } \
1549 env->vstart = 0; \
1550 /*
1551 * mask destination register are always tail-agnostic
1552 * set tail elements to 1s
1553 */ \
1554 if (vta_all_1s) { \
1555 for (; i < total_elems; i++) { \
1556 vext_set_elem_mask(vd, i, 1); \
1557 } \
1558 } \
1559 }
1560
1561 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1562 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1563 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1564 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1565
1566 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1567 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1568 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1569 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1570
1571 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1572 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1573 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1574 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1575
1576 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1577 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1578 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1579 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1580
1581 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1582 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1583 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1584 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1585
1586 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1587 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1588 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1589 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1590
1591 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1592 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1593 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1594 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1595
1596 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1597 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1598 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1599 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1600
1601 /* Vector Integer Min/Max Instructions */
1602 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1603 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1604 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1605 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1606 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1607 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1608 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1609 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1610 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1611 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1612 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1613 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1614 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1615 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1616 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1617 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1618 GEN_VEXT_VV(vminu_vv_b, 1)
1619 GEN_VEXT_VV(vminu_vv_h, 2)
1620 GEN_VEXT_VV(vminu_vv_w, 4)
1621 GEN_VEXT_VV(vminu_vv_d, 8)
1622 GEN_VEXT_VV(vmin_vv_b, 1)
1623 GEN_VEXT_VV(vmin_vv_h, 2)
1624 GEN_VEXT_VV(vmin_vv_w, 4)
1625 GEN_VEXT_VV(vmin_vv_d, 8)
1626 GEN_VEXT_VV(vmaxu_vv_b, 1)
1627 GEN_VEXT_VV(vmaxu_vv_h, 2)
1628 GEN_VEXT_VV(vmaxu_vv_w, 4)
1629 GEN_VEXT_VV(vmaxu_vv_d, 8)
1630 GEN_VEXT_VV(vmax_vv_b, 1)
1631 GEN_VEXT_VV(vmax_vv_h, 2)
1632 GEN_VEXT_VV(vmax_vv_w, 4)
1633 GEN_VEXT_VV(vmax_vv_d, 8)
1634
1635 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1636 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1637 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1638 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1639 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1640 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1641 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1642 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1643 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1644 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1645 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1646 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1647 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1648 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1649 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1650 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1651 GEN_VEXT_VX(vminu_vx_b, 1)
1652 GEN_VEXT_VX(vminu_vx_h, 2)
1653 GEN_VEXT_VX(vminu_vx_w, 4)
1654 GEN_VEXT_VX(vminu_vx_d, 8)
1655 GEN_VEXT_VX(vmin_vx_b, 1)
1656 GEN_VEXT_VX(vmin_vx_h, 2)
1657 GEN_VEXT_VX(vmin_vx_w, 4)
1658 GEN_VEXT_VX(vmin_vx_d, 8)
1659 GEN_VEXT_VX(vmaxu_vx_b, 1)
1660 GEN_VEXT_VX(vmaxu_vx_h, 2)
1661 GEN_VEXT_VX(vmaxu_vx_w, 4)
1662 GEN_VEXT_VX(vmaxu_vx_d, 8)
1663 GEN_VEXT_VX(vmax_vx_b, 1)
1664 GEN_VEXT_VX(vmax_vx_h, 2)
1665 GEN_VEXT_VX(vmax_vx_w, 4)
1666 GEN_VEXT_VX(vmax_vx_d, 8)
1667
1668 /* Vector Single-Width Integer Multiply Instructions */
1669 #define DO_MUL(N, M) (N * M)
1670 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1671 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1672 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1673 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1674 GEN_VEXT_VV(vmul_vv_b, 1)
1675 GEN_VEXT_VV(vmul_vv_h, 2)
1676 GEN_VEXT_VV(vmul_vv_w, 4)
1677 GEN_VEXT_VV(vmul_vv_d, 8)
1678
1679 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1680 {
1681 return (int16_t)s2 * (int16_t)s1 >> 8;
1682 }
1683
do_mulh_h(int16_t s2,int16_t s1)1684 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1685 {
1686 return (int32_t)s2 * (int32_t)s1 >> 16;
1687 }
1688
do_mulh_w(int32_t s2,int32_t s1)1689 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1690 {
1691 return (int64_t)s2 * (int64_t)s1 >> 32;
1692 }
1693
do_mulh_d(int64_t s2,int64_t s1)1694 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1695 {
1696 uint64_t hi_64, lo_64;
1697
1698 muls64(&lo_64, &hi_64, s1, s2);
1699 return hi_64;
1700 }
1701
do_mulhu_b(uint8_t s2,uint8_t s1)1702 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1703 {
1704 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1705 }
1706
do_mulhu_h(uint16_t s2,uint16_t s1)1707 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1708 {
1709 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1710 }
1711
do_mulhu_w(uint32_t s2,uint32_t s1)1712 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1713 {
1714 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1715 }
1716
do_mulhu_d(uint64_t s2,uint64_t s1)1717 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1718 {
1719 uint64_t hi_64, lo_64;
1720
1721 mulu64(&lo_64, &hi_64, s2, s1);
1722 return hi_64;
1723 }
1724
do_mulhsu_b(int8_t s2,uint8_t s1)1725 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1726 {
1727 return (int16_t)s2 * (uint16_t)s1 >> 8;
1728 }
1729
do_mulhsu_h(int16_t s2,uint16_t s1)1730 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1731 {
1732 return (int32_t)s2 * (uint32_t)s1 >> 16;
1733 }
1734
do_mulhsu_w(int32_t s2,uint32_t s1)1735 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1736 {
1737 return (int64_t)s2 * (uint64_t)s1 >> 32;
1738 }
1739
1740 /*
1741 * Let A = signed operand,
1742 * B = unsigned operand
1743 * P = mulu64(A, B), unsigned product
1744 *
1745 * LET X = 2 ** 64 - A, 2's complement of A
1746 * SP = signed product
1747 * THEN
1748 * IF A < 0
1749 * SP = -X * B
1750 * = -(2 ** 64 - A) * B
1751 * = A * B - 2 ** 64 * B
1752 * = P - 2 ** 64 * B
1753 * ELSE
1754 * SP = P
1755 * THEN
1756 * HI_P -= (A < 0 ? B : 0)
1757 */
1758
do_mulhsu_d(int64_t s2,uint64_t s1)1759 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1760 {
1761 uint64_t hi_64, lo_64;
1762
1763 mulu64(&lo_64, &hi_64, s2, s1);
1764
1765 hi_64 -= s2 < 0 ? s1 : 0;
1766 return hi_64;
1767 }
1768
1769 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1770 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1771 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1772 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1773 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1774 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1775 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1776 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1777 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1778 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1779 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1780 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1781 GEN_VEXT_VV(vmulh_vv_b, 1)
1782 GEN_VEXT_VV(vmulh_vv_h, 2)
1783 GEN_VEXT_VV(vmulh_vv_w, 4)
1784 GEN_VEXT_VV(vmulh_vv_d, 8)
1785 GEN_VEXT_VV(vmulhu_vv_b, 1)
1786 GEN_VEXT_VV(vmulhu_vv_h, 2)
1787 GEN_VEXT_VV(vmulhu_vv_w, 4)
1788 GEN_VEXT_VV(vmulhu_vv_d, 8)
1789 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1790 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1791 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1792 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1793
1794 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1795 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1796 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1797 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1798 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1799 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1800 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1801 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1802 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1803 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1804 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1805 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1806 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1807 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1808 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1809 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1810 GEN_VEXT_VX(vmul_vx_b, 1)
1811 GEN_VEXT_VX(vmul_vx_h, 2)
1812 GEN_VEXT_VX(vmul_vx_w, 4)
1813 GEN_VEXT_VX(vmul_vx_d, 8)
1814 GEN_VEXT_VX(vmulh_vx_b, 1)
1815 GEN_VEXT_VX(vmulh_vx_h, 2)
1816 GEN_VEXT_VX(vmulh_vx_w, 4)
1817 GEN_VEXT_VX(vmulh_vx_d, 8)
1818 GEN_VEXT_VX(vmulhu_vx_b, 1)
1819 GEN_VEXT_VX(vmulhu_vx_h, 2)
1820 GEN_VEXT_VX(vmulhu_vx_w, 4)
1821 GEN_VEXT_VX(vmulhu_vx_d, 8)
1822 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1823 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1824 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1825 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1826
1827 /* Vector Integer Divide Instructions */
1828 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1829 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1830 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1831 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1832 #define DO_REM(N, M) (unlikely(M == 0) ? N : \
1833 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1834
1835 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1836 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1837 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1838 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1839 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1840 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1841 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1842 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1843 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1844 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1845 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1846 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1847 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1848 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1849 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1850 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1851 GEN_VEXT_VV(vdivu_vv_b, 1)
1852 GEN_VEXT_VV(vdivu_vv_h, 2)
1853 GEN_VEXT_VV(vdivu_vv_w, 4)
1854 GEN_VEXT_VV(vdivu_vv_d, 8)
1855 GEN_VEXT_VV(vdiv_vv_b, 1)
1856 GEN_VEXT_VV(vdiv_vv_h, 2)
1857 GEN_VEXT_VV(vdiv_vv_w, 4)
1858 GEN_VEXT_VV(vdiv_vv_d, 8)
1859 GEN_VEXT_VV(vremu_vv_b, 1)
1860 GEN_VEXT_VV(vremu_vv_h, 2)
1861 GEN_VEXT_VV(vremu_vv_w, 4)
1862 GEN_VEXT_VV(vremu_vv_d, 8)
1863 GEN_VEXT_VV(vrem_vv_b, 1)
1864 GEN_VEXT_VV(vrem_vv_h, 2)
1865 GEN_VEXT_VV(vrem_vv_w, 4)
1866 GEN_VEXT_VV(vrem_vv_d, 8)
1867
1868 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1869 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1870 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1871 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1872 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1873 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1874 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1875 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1876 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1877 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1878 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1879 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1880 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1881 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1882 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1883 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1884 GEN_VEXT_VX(vdivu_vx_b, 1)
1885 GEN_VEXT_VX(vdivu_vx_h, 2)
1886 GEN_VEXT_VX(vdivu_vx_w, 4)
1887 GEN_VEXT_VX(vdivu_vx_d, 8)
1888 GEN_VEXT_VX(vdiv_vx_b, 1)
1889 GEN_VEXT_VX(vdiv_vx_h, 2)
1890 GEN_VEXT_VX(vdiv_vx_w, 4)
1891 GEN_VEXT_VX(vdiv_vx_d, 8)
1892 GEN_VEXT_VX(vremu_vx_b, 1)
1893 GEN_VEXT_VX(vremu_vx_h, 2)
1894 GEN_VEXT_VX(vremu_vx_w, 4)
1895 GEN_VEXT_VX(vremu_vx_d, 8)
1896 GEN_VEXT_VX(vrem_vx_b, 1)
1897 GEN_VEXT_VX(vrem_vx_h, 2)
1898 GEN_VEXT_VX(vrem_vx_w, 4)
1899 GEN_VEXT_VX(vrem_vx_d, 8)
1900
1901 /* Vector Widening Integer Multiply Instructions */
1902 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1903 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1904 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1905 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1906 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1907 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1908 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1909 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1910 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1911 GEN_VEXT_VV(vwmul_vv_b, 2)
1912 GEN_VEXT_VV(vwmul_vv_h, 4)
1913 GEN_VEXT_VV(vwmul_vv_w, 8)
1914 GEN_VEXT_VV(vwmulu_vv_b, 2)
1915 GEN_VEXT_VV(vwmulu_vv_h, 4)
1916 GEN_VEXT_VV(vwmulu_vv_w, 8)
1917 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1918 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1919 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1920
1921 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1922 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1923 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1924 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1925 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1926 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1927 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1928 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1929 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1930 GEN_VEXT_VX(vwmul_vx_b, 2)
1931 GEN_VEXT_VX(vwmul_vx_h, 4)
1932 GEN_VEXT_VX(vwmul_vx_w, 8)
1933 GEN_VEXT_VX(vwmulu_vx_b, 2)
1934 GEN_VEXT_VX(vwmulu_vx_h, 4)
1935 GEN_VEXT_VX(vwmulu_vx_w, 8)
1936 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1937 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1938 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1939
1940 /* Vector Single-Width Integer Multiply-Add Instructions */
1941 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1942 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1943 { \
1944 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1945 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1946 TD d = *((TD *)vd + HD(i)); \
1947 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1948 }
1949
1950 #define DO_MACC(N, M, D) (M * N + D)
1951 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1952 #define DO_MADD(N, M, D) (M * D + N)
1953 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1954 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1955 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1956 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1957 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1958 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1959 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1960 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1961 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1962 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1963 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1964 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1965 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1966 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1967 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1968 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1969 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1970 GEN_VEXT_VV(vmacc_vv_b, 1)
1971 GEN_VEXT_VV(vmacc_vv_h, 2)
1972 GEN_VEXT_VV(vmacc_vv_w, 4)
1973 GEN_VEXT_VV(vmacc_vv_d, 8)
1974 GEN_VEXT_VV(vnmsac_vv_b, 1)
1975 GEN_VEXT_VV(vnmsac_vv_h, 2)
1976 GEN_VEXT_VV(vnmsac_vv_w, 4)
1977 GEN_VEXT_VV(vnmsac_vv_d, 8)
1978 GEN_VEXT_VV(vmadd_vv_b, 1)
1979 GEN_VEXT_VV(vmadd_vv_h, 2)
1980 GEN_VEXT_VV(vmadd_vv_w, 4)
1981 GEN_VEXT_VV(vmadd_vv_d, 8)
1982 GEN_VEXT_VV(vnmsub_vv_b, 1)
1983 GEN_VEXT_VV(vnmsub_vv_h, 2)
1984 GEN_VEXT_VV(vnmsub_vv_w, 4)
1985 GEN_VEXT_VV(vnmsub_vv_d, 8)
1986
1987 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1988 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1989 { \
1990 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1991 TD d = *((TD *)vd + HD(i)); \
1992 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1993 }
1994
1995 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1996 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1997 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1998 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1999 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
2000 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
2001 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
2002 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
2003 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
2004 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
2005 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
2006 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
2007 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
2008 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
2009 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
2010 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
2011 GEN_VEXT_VX(vmacc_vx_b, 1)
2012 GEN_VEXT_VX(vmacc_vx_h, 2)
2013 GEN_VEXT_VX(vmacc_vx_w, 4)
2014 GEN_VEXT_VX(vmacc_vx_d, 8)
2015 GEN_VEXT_VX(vnmsac_vx_b, 1)
2016 GEN_VEXT_VX(vnmsac_vx_h, 2)
2017 GEN_VEXT_VX(vnmsac_vx_w, 4)
2018 GEN_VEXT_VX(vnmsac_vx_d, 8)
2019 GEN_VEXT_VX(vmadd_vx_b, 1)
2020 GEN_VEXT_VX(vmadd_vx_h, 2)
2021 GEN_VEXT_VX(vmadd_vx_w, 4)
2022 GEN_VEXT_VX(vmadd_vx_d, 8)
2023 GEN_VEXT_VX(vnmsub_vx_b, 1)
2024 GEN_VEXT_VX(vnmsub_vx_h, 2)
2025 GEN_VEXT_VX(vnmsub_vx_w, 4)
2026 GEN_VEXT_VX(vnmsub_vx_d, 8)
2027
2028 /* Vector Widening Integer Multiply-Add Instructions */
2029 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
2030 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
2031 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
2032 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
2033 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
2034 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
2035 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
2036 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
2037 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
2038 GEN_VEXT_VV(vwmaccu_vv_b, 2)
2039 GEN_VEXT_VV(vwmaccu_vv_h, 4)
2040 GEN_VEXT_VV(vwmaccu_vv_w, 8)
2041 GEN_VEXT_VV(vwmacc_vv_b, 2)
2042 GEN_VEXT_VV(vwmacc_vv_h, 4)
2043 GEN_VEXT_VV(vwmacc_vv_w, 8)
2044 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
2045 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
2046 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2047
2048 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
2049 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
2050 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
2051 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
2052 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
2053 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2054 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2055 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2056 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2057 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2058 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2059 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2060 GEN_VEXT_VX(vwmaccu_vx_b, 2)
2061 GEN_VEXT_VX(vwmaccu_vx_h, 4)
2062 GEN_VEXT_VX(vwmaccu_vx_w, 8)
2063 GEN_VEXT_VX(vwmacc_vx_b, 2)
2064 GEN_VEXT_VX(vwmacc_vx_h, 4)
2065 GEN_VEXT_VX(vwmacc_vx_w, 8)
2066 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
2067 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
2068 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
2069 GEN_VEXT_VX(vwmaccus_vx_b, 2)
2070 GEN_VEXT_VX(vwmaccus_vx_h, 4)
2071 GEN_VEXT_VX(vwmaccus_vx_w, 8)
2072
2073 /* Vector Integer Merge and Move Instructions */
2074 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
2075 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
2076 uint32_t desc) \
2077 { \
2078 uint32_t vl = env->vl; \
2079 uint32_t esz = sizeof(ETYPE); \
2080 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2081 uint32_t vta = vext_vta(desc); \
2082 uint32_t i; \
2083 \
2084 VSTART_CHECK_EARLY_EXIT(env, vl); \
2085 \
2086 for (i = env->vstart; i < vl; i++) { \
2087 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2088 *((ETYPE *)vd + H(i)) = s1; \
2089 } \
2090 env->vstart = 0; \
2091 /* set tail elements to 1s */ \
2092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2093 }
2094
2095 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2096 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2097 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2098 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
2099
2100 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
2101 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2102 uint32_t desc) \
2103 { \
2104 uint32_t vl = env->vl; \
2105 uint32_t esz = sizeof(ETYPE); \
2106 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2107 uint32_t vta = vext_vta(desc); \
2108 uint32_t i; \
2109 \
2110 VSTART_CHECK_EARLY_EXIT(env, vl); \
2111 \
2112 for (i = env->vstart; i < vl; i++) { \
2113 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2114 } \
2115 env->vstart = 0; \
2116 /* set tail elements to 1s */ \
2117 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2118 }
2119
2120 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2121 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2122 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2123 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2124
2125 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
2126 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2127 CPURISCVState *env, uint32_t desc) \
2128 { \
2129 uint32_t vl = env->vl; \
2130 uint32_t esz = sizeof(ETYPE); \
2131 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2132 uint32_t vta = vext_vta(desc); \
2133 uint32_t i; \
2134 \
2135 VSTART_CHECK_EARLY_EXIT(env, vl); \
2136 \
2137 for (i = env->vstart; i < vl; i++) { \
2138 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
2139 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2140 } \
2141 env->vstart = 0; \
2142 /* set tail elements to 1s */ \
2143 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2144 }
2145
2146 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2147 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2148 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2149 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2150
2151 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
2152 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2153 void *vs2, CPURISCVState *env, uint32_t desc) \
2154 { \
2155 uint32_t vl = env->vl; \
2156 uint32_t esz = sizeof(ETYPE); \
2157 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2158 uint32_t vta = vext_vta(desc); \
2159 uint32_t i; \
2160 \
2161 VSTART_CHECK_EARLY_EXIT(env, vl); \
2162 \
2163 for (i = env->vstart; i < vl; i++) { \
2164 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
2165 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
2166 (ETYPE)(target_long)s1); \
2167 *((ETYPE *)vd + H(i)) = d; \
2168 } \
2169 env->vstart = 0; \
2170 /* set tail elements to 1s */ \
2171 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2172 }
2173
2174 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2175 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2176 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2177 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2178
2179 /*
2180 * Vector Fixed-Point Arithmetic Instructions
2181 */
2182
2183 /* Vector Single-Width Saturating Add and Subtract */
2184
2185 /*
2186 * As fixed point instructions probably have round mode and saturation,
2187 * define common macros for fixed point here.
2188 */
2189 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2190 CPURISCVState *env, int vxrm);
2191
2192 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2193 static inline void \
2194 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2195 CPURISCVState *env, int vxrm) \
2196 { \
2197 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2198 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2199 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2200 }
2201
2202 static inline void
vext_vv_rm_1(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivv2_rm_fn * fn,uint32_t vma,uint32_t esz)2203 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2204 CPURISCVState *env,
2205 uint32_t vl, uint32_t vm, int vxrm,
2206 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
2207 {
2208 for (uint32_t i = env->vstart; i < vl; i++) {
2209 if (!vm && !vext_elem_mask(v0, i)) {
2210 /* set masked-off elements to 1s */
2211 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2212 continue;
2213 }
2214 fn(vd, vs1, vs2, i, env, vxrm);
2215 }
2216 env->vstart = 0;
2217 }
2218
2219 static inline void
vext_vv_rm_2(void * vd,void * v0,void * vs1,void * vs2,CPURISCVState * env,uint32_t desc,opivv2_rm_fn * fn,uint32_t esz)2220 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2221 CPURISCVState *env,
2222 uint32_t desc,
2223 opivv2_rm_fn *fn, uint32_t esz)
2224 {
2225 uint32_t vm = vext_vm(desc);
2226 uint32_t vl = env->vl;
2227 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2228 uint32_t vta = vext_vta(desc);
2229 uint32_t vma = vext_vma(desc);
2230
2231 VSTART_CHECK_EARLY_EXIT(env, vl);
2232
2233 switch (env->vxrm) {
2234 case 0: /* rnu */
2235 vext_vv_rm_1(vd, v0, vs1, vs2,
2236 env, vl, vm, 0, fn, vma, esz);
2237 break;
2238 case 1: /* rne */
2239 vext_vv_rm_1(vd, v0, vs1, vs2,
2240 env, vl, vm, 1, fn, vma, esz);
2241 break;
2242 case 2: /* rdn */
2243 vext_vv_rm_1(vd, v0, vs1, vs2,
2244 env, vl, vm, 2, fn, vma, esz);
2245 break;
2246 default: /* rod */
2247 vext_vv_rm_1(vd, v0, vs1, vs2,
2248 env, vl, vm, 3, fn, vma, esz);
2249 break;
2250 }
2251 /* set tail elements to 1s */
2252 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2253 }
2254
2255 /* generate helpers for fixed point instructions with OPIVV format */
2256 #define GEN_VEXT_VV_RM(NAME, ESZ) \
2257 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2258 CPURISCVState *env, uint32_t desc) \
2259 { \
2260 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2261 do_##NAME, ESZ); \
2262 }
2263
saddu8(CPURISCVState * env,int vxrm,uint8_t a,uint8_t b)2264 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2265 uint8_t b)
2266 {
2267 uint8_t res = a + b;
2268 if (res < a) {
2269 res = UINT8_MAX;
2270 env->vxsat = 0x1;
2271 }
2272 return res;
2273 }
2274
saddu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2275 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2276 uint16_t b)
2277 {
2278 uint16_t res = a + b;
2279 if (res < a) {
2280 res = UINT16_MAX;
2281 env->vxsat = 0x1;
2282 }
2283 return res;
2284 }
2285
saddu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2286 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2287 uint32_t b)
2288 {
2289 uint32_t res = a + b;
2290 if (res < a) {
2291 res = UINT32_MAX;
2292 env->vxsat = 0x1;
2293 }
2294 return res;
2295 }
2296
saddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2297 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2298 uint64_t b)
2299 {
2300 uint64_t res = a + b;
2301 if (res < a) {
2302 res = UINT64_MAX;
2303 env->vxsat = 0x1;
2304 }
2305 return res;
2306 }
2307
2308 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2309 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2310 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2311 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2312 GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2313 GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2314 GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2315 GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2316
2317 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2318 CPURISCVState *env, int vxrm);
2319
2320 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2321 static inline void \
2322 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2323 CPURISCVState *env, int vxrm) \
2324 { \
2325 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2326 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2327 }
2328
2329 static inline void
vext_vx_rm_1(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t vl,uint32_t vm,int vxrm,opivx2_rm_fn * fn,uint32_t vma,uint32_t esz)2330 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2331 CPURISCVState *env,
2332 uint32_t vl, uint32_t vm, int vxrm,
2333 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2334 {
2335 for (uint32_t i = env->vstart; i < vl; i++) {
2336 if (!vm && !vext_elem_mask(v0, i)) {
2337 /* set masked-off elements to 1s */
2338 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2339 continue;
2340 }
2341 fn(vd, s1, vs2, i, env, vxrm);
2342 }
2343 env->vstart = 0;
2344 }
2345
2346 static inline void
vext_vx_rm_2(void * vd,void * v0,target_long s1,void * vs2,CPURISCVState * env,uint32_t desc,opivx2_rm_fn * fn,uint32_t esz)2347 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2348 CPURISCVState *env,
2349 uint32_t desc,
2350 opivx2_rm_fn *fn, uint32_t esz)
2351 {
2352 uint32_t vm = vext_vm(desc);
2353 uint32_t vl = env->vl;
2354 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2355 uint32_t vta = vext_vta(desc);
2356 uint32_t vma = vext_vma(desc);
2357
2358 VSTART_CHECK_EARLY_EXIT(env, vl);
2359
2360 switch (env->vxrm) {
2361 case 0: /* rnu */
2362 vext_vx_rm_1(vd, v0, s1, vs2,
2363 env, vl, vm, 0, fn, vma, esz);
2364 break;
2365 case 1: /* rne */
2366 vext_vx_rm_1(vd, v0, s1, vs2,
2367 env, vl, vm, 1, fn, vma, esz);
2368 break;
2369 case 2: /* rdn */
2370 vext_vx_rm_1(vd, v0, s1, vs2,
2371 env, vl, vm, 2, fn, vma, esz);
2372 break;
2373 default: /* rod */
2374 vext_vx_rm_1(vd, v0, s1, vs2,
2375 env, vl, vm, 3, fn, vma, esz);
2376 break;
2377 }
2378 /* set tail elements to 1s */
2379 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2380 }
2381
2382 /* generate helpers for fixed point instructions with OPIVX format */
2383 #define GEN_VEXT_VX_RM(NAME, ESZ) \
2384 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2385 void *vs2, CPURISCVState *env, \
2386 uint32_t desc) \
2387 { \
2388 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2389 do_##NAME, ESZ); \
2390 }
2391
RVVCALL(OPIVX2_RM,vsaddu_vx_b,OP_UUU_B,H1,H1,saddu8)2392 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2393 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2394 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2395 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2396 GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2397 GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2398 GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2399 GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2400
2401 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2402 {
2403 int8_t res = a + b;
2404 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2405 res = a > 0 ? INT8_MAX : INT8_MIN;
2406 env->vxsat = 0x1;
2407 }
2408 return res;
2409 }
2410
sadd16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2411 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2412 int16_t b)
2413 {
2414 int16_t res = a + b;
2415 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2416 res = a > 0 ? INT16_MAX : INT16_MIN;
2417 env->vxsat = 0x1;
2418 }
2419 return res;
2420 }
2421
sadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2422 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2423 int32_t b)
2424 {
2425 int32_t res = a + b;
2426 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2427 res = a > 0 ? INT32_MAX : INT32_MIN;
2428 env->vxsat = 0x1;
2429 }
2430 return res;
2431 }
2432
sadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2433 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2434 int64_t b)
2435 {
2436 int64_t res = a + b;
2437 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2438 res = a > 0 ? INT64_MAX : INT64_MIN;
2439 env->vxsat = 0x1;
2440 }
2441 return res;
2442 }
2443
RVVCALL(OPIVV2_RM,vsadd_vv_b,OP_SSS_B,H1,H1,H1,sadd8)2444 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2445 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2446 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2447 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2448 GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2449 GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2450 GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2451 GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2452
2453 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2454 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2455 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2456 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2457 GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2458 GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2459 GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2460 GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2461
2462 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2463 uint8_t b)
2464 {
2465 uint8_t res = a - b;
2466 if (res > a) {
2467 res = 0;
2468 env->vxsat = 0x1;
2469 }
2470 return res;
2471 }
2472
ssubu16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2473 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2474 uint16_t b)
2475 {
2476 uint16_t res = a - b;
2477 if (res > a) {
2478 res = 0;
2479 env->vxsat = 0x1;
2480 }
2481 return res;
2482 }
2483
ssubu32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2484 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2485 uint32_t b)
2486 {
2487 uint32_t res = a - b;
2488 if (res > a) {
2489 res = 0;
2490 env->vxsat = 0x1;
2491 }
2492 return res;
2493 }
2494
ssubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2495 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2496 uint64_t b)
2497 {
2498 uint64_t res = a - b;
2499 if (res > a) {
2500 res = 0;
2501 env->vxsat = 0x1;
2502 }
2503 return res;
2504 }
2505
RVVCALL(OPIVV2_RM,vssubu_vv_b,OP_UUU_B,H1,H1,H1,ssubu8)2506 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2507 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2508 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2509 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2510 GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2511 GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2512 GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2513 GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2514
2515 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2516 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2517 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2518 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2519 GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2520 GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2521 GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2522 GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2523
2524 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2525 {
2526 int8_t res = a - b;
2527 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2528 res = a >= 0 ? INT8_MAX : INT8_MIN;
2529 env->vxsat = 0x1;
2530 }
2531 return res;
2532 }
2533
ssub16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2534 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2535 int16_t b)
2536 {
2537 int16_t res = a - b;
2538 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2539 res = a >= 0 ? INT16_MAX : INT16_MIN;
2540 env->vxsat = 0x1;
2541 }
2542 return res;
2543 }
2544
ssub32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2545 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2546 int32_t b)
2547 {
2548 int32_t res = a - b;
2549 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2550 res = a >= 0 ? INT32_MAX : INT32_MIN;
2551 env->vxsat = 0x1;
2552 }
2553 return res;
2554 }
2555
ssub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2556 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2557 int64_t b)
2558 {
2559 int64_t res = a - b;
2560 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2561 res = a >= 0 ? INT64_MAX : INT64_MIN;
2562 env->vxsat = 0x1;
2563 }
2564 return res;
2565 }
2566
RVVCALL(OPIVV2_RM,vssub_vv_b,OP_SSS_B,H1,H1,H1,ssub8)2567 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2568 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2569 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2570 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2571 GEN_VEXT_VV_RM(vssub_vv_b, 1)
2572 GEN_VEXT_VV_RM(vssub_vv_h, 2)
2573 GEN_VEXT_VV_RM(vssub_vv_w, 4)
2574 GEN_VEXT_VV_RM(vssub_vv_d, 8)
2575
2576 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2577 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2578 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2579 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2580 GEN_VEXT_VX_RM(vssub_vx_b, 1)
2581 GEN_VEXT_VX_RM(vssub_vx_h, 2)
2582 GEN_VEXT_VX_RM(vssub_vx_w, 4)
2583 GEN_VEXT_VX_RM(vssub_vx_d, 8)
2584
2585 /* Vector Single-Width Averaging Add and Subtract */
2586 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2587 {
2588 uint8_t d = extract64(v, shift, 1);
2589 uint8_t d1;
2590 uint64_t D1, D2;
2591
2592 if (shift == 0 || shift > 64) {
2593 return 0;
2594 }
2595
2596 d1 = extract64(v, shift - 1, 1);
2597 D1 = extract64(v, 0, shift);
2598 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2599 return d1;
2600 } else if (vxrm == 1) { /* round-to-nearest-even */
2601 if (shift > 1) {
2602 D2 = extract64(v, 0, shift - 1);
2603 return d1 & ((D2 != 0) | d);
2604 } else {
2605 return d1 & d;
2606 }
2607 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2608 return !d & (D1 != 0);
2609 }
2610 return 0; /* round-down (truncate) */
2611 }
2612
aadd32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2613 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2614 int32_t b)
2615 {
2616 int64_t res = (int64_t)a + b;
2617 uint8_t round = get_round(vxrm, res, 1);
2618
2619 return (res >> 1) + round;
2620 }
2621
aadd64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2622 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2623 int64_t b)
2624 {
2625 int64_t res = a + b;
2626 uint8_t round = get_round(vxrm, res, 1);
2627 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2628
2629 /* With signed overflow, bit 64 is inverse of bit 63. */
2630 return ((res >> 1) ^ over) + round;
2631 }
2632
RVVCALL(OPIVV2_RM,vaadd_vv_b,OP_SSS_B,H1,H1,H1,aadd32)2633 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2634 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2635 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2636 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2637 GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2638 GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2639 GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2640 GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2641
2642 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2643 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2644 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2645 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2646 GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2647 GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2648 GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2649 GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2650
2651 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2652 uint32_t a, uint32_t b)
2653 {
2654 uint64_t res = (uint64_t)a + b;
2655 uint8_t round = get_round(vxrm, res, 1);
2656
2657 return (res >> 1) + round;
2658 }
2659
aaddu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2660 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2661 uint64_t a, uint64_t b)
2662 {
2663 uint64_t res = a + b;
2664 uint8_t round = get_round(vxrm, res, 1);
2665 uint64_t over = (uint64_t)(res < a) << 63;
2666
2667 return ((res >> 1) | over) + round;
2668 }
2669
RVVCALL(OPIVV2_RM,vaaddu_vv_b,OP_UUU_B,H1,H1,H1,aaddu32)2670 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2671 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2672 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2673 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2674 GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2675 GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2676 GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2677 GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2678
2679 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2680 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2681 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2682 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2683 GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2684 GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2685 GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2686 GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2687
2688 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2689 int32_t b)
2690 {
2691 int64_t res = (int64_t)a - b;
2692 uint8_t round = get_round(vxrm, res, 1);
2693
2694 return (res >> 1) + round;
2695 }
2696
asub64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2697 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2698 int64_t b)
2699 {
2700 int64_t res = (int64_t)a - b;
2701 uint8_t round = get_round(vxrm, res, 1);
2702 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2703
2704 /* With signed overflow, bit 64 is inverse of bit 63. */
2705 return ((res >> 1) ^ over) + round;
2706 }
2707
RVVCALL(OPIVV2_RM,vasub_vv_b,OP_SSS_B,H1,H1,H1,asub32)2708 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2709 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2710 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2711 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2712 GEN_VEXT_VV_RM(vasub_vv_b, 1)
2713 GEN_VEXT_VV_RM(vasub_vv_h, 2)
2714 GEN_VEXT_VV_RM(vasub_vv_w, 4)
2715 GEN_VEXT_VV_RM(vasub_vv_d, 8)
2716
2717 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2718 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2719 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2720 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2721 GEN_VEXT_VX_RM(vasub_vx_b, 1)
2722 GEN_VEXT_VX_RM(vasub_vx_h, 2)
2723 GEN_VEXT_VX_RM(vasub_vx_w, 4)
2724 GEN_VEXT_VX_RM(vasub_vx_d, 8)
2725
2726 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2727 uint32_t a, uint32_t b)
2728 {
2729 int64_t res = (int64_t)a - b;
2730 uint8_t round = get_round(vxrm, res, 1);
2731
2732 return (res >> 1) + round;
2733 }
2734
asubu64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2735 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2736 uint64_t a, uint64_t b)
2737 {
2738 uint64_t res = (uint64_t)a - b;
2739 uint8_t round = get_round(vxrm, res, 1);
2740 uint64_t over = (uint64_t)(res > a) << 63;
2741
2742 return ((res >> 1) | over) + round;
2743 }
2744
RVVCALL(OPIVV2_RM,vasubu_vv_b,OP_UUU_B,H1,H1,H1,asubu32)2745 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2746 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2747 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2748 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2749 GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2750 GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2751 GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2752 GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2753
2754 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2755 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2756 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2757 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2758 GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2759 GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2760 GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2761 GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2762
2763 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2764 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2765 {
2766 uint8_t round;
2767 int16_t res;
2768
2769 res = (int16_t)a * (int16_t)b;
2770 round = get_round(vxrm, res, 7);
2771 res = (res >> 7) + round;
2772
2773 if (res > INT8_MAX) {
2774 env->vxsat = 0x1;
2775 return INT8_MAX;
2776 } else if (res < INT8_MIN) {
2777 env->vxsat = 0x1;
2778 return INT8_MIN;
2779 } else {
2780 return res;
2781 }
2782 }
2783
vsmul16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2784 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2785 {
2786 uint8_t round;
2787 int32_t res;
2788
2789 res = (int32_t)a * (int32_t)b;
2790 round = get_round(vxrm, res, 15);
2791 res = (res >> 15) + round;
2792
2793 if (res > INT16_MAX) {
2794 env->vxsat = 0x1;
2795 return INT16_MAX;
2796 } else if (res < INT16_MIN) {
2797 env->vxsat = 0x1;
2798 return INT16_MIN;
2799 } else {
2800 return res;
2801 }
2802 }
2803
vsmul32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2804 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2805 {
2806 uint8_t round;
2807 int64_t res;
2808
2809 res = (int64_t)a * (int64_t)b;
2810 round = get_round(vxrm, res, 31);
2811 res = (res >> 31) + round;
2812
2813 if (res > INT32_MAX) {
2814 env->vxsat = 0x1;
2815 return INT32_MAX;
2816 } else if (res < INT32_MIN) {
2817 env->vxsat = 0x1;
2818 return INT32_MIN;
2819 } else {
2820 return res;
2821 }
2822 }
2823
vsmul64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2824 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2825 {
2826 uint8_t round;
2827 uint64_t hi_64, lo_64;
2828 int64_t res;
2829
2830 if (a == INT64_MIN && b == INT64_MIN) {
2831 env->vxsat = 1;
2832 return INT64_MAX;
2833 }
2834
2835 muls64(&lo_64, &hi_64, a, b);
2836 round = get_round(vxrm, lo_64, 63);
2837 /*
2838 * Cannot overflow, as there are always
2839 * 2 sign bits after multiply.
2840 */
2841 res = (hi_64 << 1) | (lo_64 >> 63);
2842 if (round) {
2843 if (res == INT64_MAX) {
2844 env->vxsat = 1;
2845 } else {
2846 res += 1;
2847 }
2848 }
2849 return res;
2850 }
2851
RVVCALL(OPIVV2_RM,vsmul_vv_b,OP_SSS_B,H1,H1,H1,vsmul8)2852 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2853 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2854 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2855 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2856 GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2857 GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2858 GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2859 GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2860
2861 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2862 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2863 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2864 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2865 GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2866 GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2867 GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2868 GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2869
2870 /* Vector Single-Width Scaling Shift Instructions */
2871 static inline uint8_t
2872 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2873 {
2874 uint8_t round, shift = b & 0x7;
2875 uint8_t res;
2876
2877 round = get_round(vxrm, a, shift);
2878 res = (a >> shift) + round;
2879 return res;
2880 }
2881 static inline uint16_t
vssrl16(CPURISCVState * env,int vxrm,uint16_t a,uint16_t b)2882 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2883 {
2884 uint8_t round, shift = b & 0xf;
2885
2886 round = get_round(vxrm, a, shift);
2887 return (a >> shift) + round;
2888 }
2889 static inline uint32_t
vssrl32(CPURISCVState * env,int vxrm,uint32_t a,uint32_t b)2890 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2891 {
2892 uint8_t round, shift = b & 0x1f;
2893
2894 round = get_round(vxrm, a, shift);
2895 return (a >> shift) + round;
2896 }
2897 static inline uint64_t
vssrl64(CPURISCVState * env,int vxrm,uint64_t a,uint64_t b)2898 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2899 {
2900 uint8_t round, shift = b & 0x3f;
2901
2902 round = get_round(vxrm, a, shift);
2903 return (a >> shift) + round;
2904 }
RVVCALL(OPIVV2_RM,vssrl_vv_b,OP_UUU_B,H1,H1,H1,vssrl8)2905 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2906 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2907 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2908 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2909 GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2910 GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2911 GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2912 GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2913
2914 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2915 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2916 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2917 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2918 GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2919 GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2920 GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2921 GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2922
2923 static inline int8_t
2924 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2925 {
2926 uint8_t round, shift = b & 0x7;
2927
2928 round = get_round(vxrm, a, shift);
2929 return (a >> shift) + round;
2930 }
2931 static inline int16_t
vssra16(CPURISCVState * env,int vxrm,int16_t a,int16_t b)2932 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2933 {
2934 uint8_t round, shift = b & 0xf;
2935
2936 round = get_round(vxrm, a, shift);
2937 return (a >> shift) + round;
2938 }
2939 static inline int32_t
vssra32(CPURISCVState * env,int vxrm,int32_t a,int32_t b)2940 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2941 {
2942 uint8_t round, shift = b & 0x1f;
2943
2944 round = get_round(vxrm, a, shift);
2945 return (a >> shift) + round;
2946 }
2947 static inline int64_t
vssra64(CPURISCVState * env,int vxrm,int64_t a,int64_t b)2948 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2949 {
2950 uint8_t round, shift = b & 0x3f;
2951
2952 round = get_round(vxrm, a, shift);
2953 return (a >> shift) + round;
2954 }
2955
RVVCALL(OPIVV2_RM,vssra_vv_b,OP_SSS_B,H1,H1,H1,vssra8)2956 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2957 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2958 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2959 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2960 GEN_VEXT_VV_RM(vssra_vv_b, 1)
2961 GEN_VEXT_VV_RM(vssra_vv_h, 2)
2962 GEN_VEXT_VV_RM(vssra_vv_w, 4)
2963 GEN_VEXT_VV_RM(vssra_vv_d, 8)
2964
2965 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2966 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2967 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2968 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2969 GEN_VEXT_VX_RM(vssra_vx_b, 1)
2970 GEN_VEXT_VX_RM(vssra_vx_h, 2)
2971 GEN_VEXT_VX_RM(vssra_vx_w, 4)
2972 GEN_VEXT_VX_RM(vssra_vx_d, 8)
2973
2974 /* Vector Narrowing Fixed-Point Clip Instructions */
2975 static inline int8_t
2976 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2977 {
2978 uint8_t round, shift = b & 0xf;
2979 int16_t res;
2980
2981 round = get_round(vxrm, a, shift);
2982 res = (a >> shift) + round;
2983 if (res > INT8_MAX) {
2984 env->vxsat = 0x1;
2985 return INT8_MAX;
2986 } else if (res < INT8_MIN) {
2987 env->vxsat = 0x1;
2988 return INT8_MIN;
2989 } else {
2990 return res;
2991 }
2992 }
2993
2994 static inline int16_t
vnclip16(CPURISCVState * env,int vxrm,int32_t a,int16_t b)2995 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2996 {
2997 uint8_t round, shift = b & 0x1f;
2998 int32_t res;
2999
3000 round = get_round(vxrm, a, shift);
3001 res = (a >> shift) + round;
3002 if (res > INT16_MAX) {
3003 env->vxsat = 0x1;
3004 return INT16_MAX;
3005 } else if (res < INT16_MIN) {
3006 env->vxsat = 0x1;
3007 return INT16_MIN;
3008 } else {
3009 return res;
3010 }
3011 }
3012
3013 static inline int32_t
vnclip32(CPURISCVState * env,int vxrm,int64_t a,int32_t b)3014 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
3015 {
3016 uint8_t round, shift = b & 0x3f;
3017 int64_t res;
3018
3019 round = get_round(vxrm, a, shift);
3020 res = (a >> shift) + round;
3021 if (res > INT32_MAX) {
3022 env->vxsat = 0x1;
3023 return INT32_MAX;
3024 } else if (res < INT32_MIN) {
3025 env->vxsat = 0x1;
3026 return INT32_MIN;
3027 } else {
3028 return res;
3029 }
3030 }
3031
RVVCALL(OPIVV2_RM,vnclip_wv_b,NOP_SSS_B,H1,H2,H1,vnclip8)3032 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
3033 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
3034 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
3035 GEN_VEXT_VV_RM(vnclip_wv_b, 1)
3036 GEN_VEXT_VV_RM(vnclip_wv_h, 2)
3037 GEN_VEXT_VV_RM(vnclip_wv_w, 4)
3038
3039 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
3040 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
3041 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
3042 GEN_VEXT_VX_RM(vnclip_wx_b, 1)
3043 GEN_VEXT_VX_RM(vnclip_wx_h, 2)
3044 GEN_VEXT_VX_RM(vnclip_wx_w, 4)
3045
3046 static inline uint8_t
3047 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
3048 {
3049 uint8_t round, shift = b & 0xf;
3050 uint16_t res;
3051
3052 round = get_round(vxrm, a, shift);
3053 res = (a >> shift) + round;
3054 if (res > UINT8_MAX) {
3055 env->vxsat = 0x1;
3056 return UINT8_MAX;
3057 } else {
3058 return res;
3059 }
3060 }
3061
3062 static inline uint16_t
vnclipu16(CPURISCVState * env,int vxrm,uint32_t a,uint16_t b)3063 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3064 {
3065 uint8_t round, shift = b & 0x1f;
3066 uint32_t res;
3067
3068 round = get_round(vxrm, a, shift);
3069 res = (a >> shift) + round;
3070 if (res > UINT16_MAX) {
3071 env->vxsat = 0x1;
3072 return UINT16_MAX;
3073 } else {
3074 return res;
3075 }
3076 }
3077
3078 static inline uint32_t
vnclipu32(CPURISCVState * env,int vxrm,uint64_t a,uint32_t b)3079 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3080 {
3081 uint8_t round, shift = b & 0x3f;
3082 uint64_t res;
3083
3084 round = get_round(vxrm, a, shift);
3085 res = (a >> shift) + round;
3086 if (res > UINT32_MAX) {
3087 env->vxsat = 0x1;
3088 return UINT32_MAX;
3089 } else {
3090 return res;
3091 }
3092 }
3093
RVVCALL(OPIVV2_RM,vnclipu_wv_b,NOP_UUU_B,H1,H2,H1,vnclipu8)3094 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3095 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3096 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3097 GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3098 GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3099 GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
3100
3101 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3102 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3103 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
3104 GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3105 GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3106 GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
3107
3108 /*
3109 * Vector Float Point Arithmetic Instructions
3110 */
3111 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
3112 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3113 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3114 CPURISCVState *env) \
3115 { \
3116 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3117 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3118 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3119 }
3120
3121 #define GEN_VEXT_VV_ENV(NAME, ESZ) \
3122 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3123 void *vs2, CPURISCVState *env, \
3124 uint32_t desc) \
3125 { \
3126 uint32_t vm = vext_vm(desc); \
3127 uint32_t vl = env->vl; \
3128 uint32_t total_elems = \
3129 vext_get_total_elems(env, desc, ESZ); \
3130 uint32_t vta = vext_vta(desc); \
3131 uint32_t vma = vext_vma(desc); \
3132 uint32_t i; \
3133 \
3134 VSTART_CHECK_EARLY_EXIT(env, vl); \
3135 \
3136 for (i = env->vstart; i < vl; i++) { \
3137 if (!vm && !vext_elem_mask(v0, i)) { \
3138 /* set masked-off elements to 1s */ \
3139 vext_set_elems_1s(vd, vma, i * ESZ, \
3140 (i + 1) * ESZ); \
3141 continue; \
3142 } \
3143 do_##NAME(vd, vs1, vs2, i, env); \
3144 } \
3145 env->vstart = 0; \
3146 /* set tail elements to 1s */ \
3147 vext_set_elems_1s(vd, vta, vl * ESZ, \
3148 total_elems * ESZ); \
3149 }
3150
3151 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3152 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3153 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3154 GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3155 GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3156 GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
3157
3158 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3159 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3160 CPURISCVState *env) \
3161 { \
3162 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3163 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3164 }
3165
3166 #define GEN_VEXT_VF(NAME, ESZ) \
3167 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3168 void *vs2, CPURISCVState *env, \
3169 uint32_t desc) \
3170 { \
3171 uint32_t vm = vext_vm(desc); \
3172 uint32_t vl = env->vl; \
3173 uint32_t total_elems = \
3174 vext_get_total_elems(env, desc, ESZ); \
3175 uint32_t vta = vext_vta(desc); \
3176 uint32_t vma = vext_vma(desc); \
3177 uint32_t i; \
3178 \
3179 VSTART_CHECK_EARLY_EXIT(env, vl); \
3180 \
3181 for (i = env->vstart; i < vl; i++) { \
3182 if (!vm && !vext_elem_mask(v0, i)) { \
3183 /* set masked-off elements to 1s */ \
3184 vext_set_elems_1s(vd, vma, i * ESZ, \
3185 (i + 1) * ESZ); \
3186 continue; \
3187 } \
3188 do_##NAME(vd, s1, vs2, i, env); \
3189 } \
3190 env->vstart = 0; \
3191 /* set tail elements to 1s */ \
3192 vext_set_elems_1s(vd, vta, vl * ESZ, \
3193 total_elems * ESZ); \
3194 }
3195
3196 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3197 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3198 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3199 GEN_VEXT_VF(vfadd_vf_h, 2)
3200 GEN_VEXT_VF(vfadd_vf_w, 4)
3201 GEN_VEXT_VF(vfadd_vf_d, 8)
3202
3203 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3204 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3205 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3206 GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3207 GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3208 GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
3209 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3210 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3211 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3212 GEN_VEXT_VF(vfsub_vf_h, 2)
3213 GEN_VEXT_VF(vfsub_vf_w, 4)
3214 GEN_VEXT_VF(vfsub_vf_d, 8)
3215
3216 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3217 {
3218 return float16_sub(b, a, s);
3219 }
3220
float32_rsub(uint32_t a,uint32_t b,float_status * s)3221 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3222 {
3223 return float32_sub(b, a, s);
3224 }
3225
float64_rsub(uint64_t a,uint64_t b,float_status * s)3226 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3227 {
3228 return float64_sub(b, a, s);
3229 }
3230
RVVCALL(OPFVF2,vfrsub_vf_h,OP_UUU_H,H2,H2,float16_rsub)3231 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3232 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3233 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3234 GEN_VEXT_VF(vfrsub_vf_h, 2)
3235 GEN_VEXT_VF(vfrsub_vf_w, 4)
3236 GEN_VEXT_VF(vfrsub_vf_d, 8)
3237
3238 /* Vector Widening Floating-Point Add/Subtract Instructions */
3239 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3240 {
3241 return float32_add(float16_to_float32(a, true, s),
3242 float16_to_float32(b, true, s), s);
3243 }
3244
vfwadd32(uint32_t a,uint32_t b,float_status * s)3245 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3246 {
3247 return float64_add(float32_to_float64(a, s),
3248 float32_to_float64(b, s), s);
3249
3250 }
3251
RVVCALL(OPFVV2,vfwadd_vv_h,WOP_UUU_H,H4,H2,H2,vfwadd16)3252 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3253 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3254 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3255 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3256 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3257 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3258 GEN_VEXT_VF(vfwadd_vf_h, 4)
3259 GEN_VEXT_VF(vfwadd_vf_w, 8)
3260
3261 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3262 {
3263 return float32_sub(float16_to_float32(a, true, s),
3264 float16_to_float32(b, true, s), s);
3265 }
3266
vfwsub32(uint32_t a,uint32_t b,float_status * s)3267 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3268 {
3269 return float64_sub(float32_to_float64(a, s),
3270 float32_to_float64(b, s), s);
3271
3272 }
3273
RVVCALL(OPFVV2,vfwsub_vv_h,WOP_UUU_H,H4,H2,H2,vfwsub16)3274 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3275 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3276 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3277 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3278 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3279 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3280 GEN_VEXT_VF(vfwsub_vf_h, 4)
3281 GEN_VEXT_VF(vfwsub_vf_w, 8)
3282
3283 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3284 {
3285 return float32_add(a, float16_to_float32(b, true, s), s);
3286 }
3287
vfwaddw32(uint64_t a,uint32_t b,float_status * s)3288 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3289 {
3290 return float64_add(a, float32_to_float64(b, s), s);
3291 }
3292
RVVCALL(OPFVV2,vfwadd_wv_h,WOP_WUUU_H,H4,H2,H2,vfwaddw16)3293 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3294 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3295 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3296 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3297 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3298 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3299 GEN_VEXT_VF(vfwadd_wf_h, 4)
3300 GEN_VEXT_VF(vfwadd_wf_w, 8)
3301
3302 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3303 {
3304 return float32_sub(a, float16_to_float32(b, true, s), s);
3305 }
3306
vfwsubw32(uint64_t a,uint32_t b,float_status * s)3307 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3308 {
3309 return float64_sub(a, float32_to_float64(b, s), s);
3310 }
3311
RVVCALL(OPFVV2,vfwsub_wv_h,WOP_WUUU_H,H4,H2,H2,vfwsubw16)3312 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3313 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3314 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3315 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3316 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3317 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3318 GEN_VEXT_VF(vfwsub_wf_h, 4)
3319 GEN_VEXT_VF(vfwsub_wf_w, 8)
3320
3321 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3322 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3323 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3324 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3325 GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3326 GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3327 GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3328 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3329 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3330 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3331 GEN_VEXT_VF(vfmul_vf_h, 2)
3332 GEN_VEXT_VF(vfmul_vf_w, 4)
3333 GEN_VEXT_VF(vfmul_vf_d, 8)
3334
3335 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3336 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3337 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3338 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3339 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3340 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3341 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3342 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3343 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3344 GEN_VEXT_VF(vfdiv_vf_h, 2)
3345 GEN_VEXT_VF(vfdiv_vf_w, 4)
3346 GEN_VEXT_VF(vfdiv_vf_d, 8)
3347
3348 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3349 {
3350 return float16_div(b, a, s);
3351 }
3352
float32_rdiv(uint32_t a,uint32_t b,float_status * s)3353 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3354 {
3355 return float32_div(b, a, s);
3356 }
3357
float64_rdiv(uint64_t a,uint64_t b,float_status * s)3358 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3359 {
3360 return float64_div(b, a, s);
3361 }
3362
RVVCALL(OPFVF2,vfrdiv_vf_h,OP_UUU_H,H2,H2,float16_rdiv)3363 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3364 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3365 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3366 GEN_VEXT_VF(vfrdiv_vf_h, 2)
3367 GEN_VEXT_VF(vfrdiv_vf_w, 4)
3368 GEN_VEXT_VF(vfrdiv_vf_d, 8)
3369
3370 /* Vector Widening Floating-Point Multiply */
3371 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3372 {
3373 return float32_mul(float16_to_float32(a, true, s),
3374 float16_to_float32(b, true, s), s);
3375 }
3376
vfwmul32(uint32_t a,uint32_t b,float_status * s)3377 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3378 {
3379 return float64_mul(float32_to_float64(a, s),
3380 float32_to_float64(b, s), s);
3381
3382 }
RVVCALL(OPFVV2,vfwmul_vv_h,WOP_UUU_H,H4,H2,H2,vfwmul16)3383 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3384 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3385 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3386 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3387 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3388 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3389 GEN_VEXT_VF(vfwmul_vf_h, 4)
3390 GEN_VEXT_VF(vfwmul_vf_w, 8)
3391
3392 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3393 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3394 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3395 CPURISCVState *env) \
3396 { \
3397 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3398 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3399 TD d = *((TD *)vd + HD(i)); \
3400 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3401 }
3402
3403 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3404 {
3405 return float16_muladd(a, b, d, 0, s);
3406 }
3407
fmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3408 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3409 {
3410 return float32_muladd(a, b, d, 0, s);
3411 }
3412
fmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3413 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3414 {
3415 return float64_muladd(a, b, d, 0, s);
3416 }
3417
RVVCALL(OPFVV3,vfmacc_vv_h,OP_UUU_H,H2,H2,H2,fmacc16)3418 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3419 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3420 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3421 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3422 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3423 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3424
3425 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3426 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3427 CPURISCVState *env) \
3428 { \
3429 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3430 TD d = *((TD *)vd + HD(i)); \
3431 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3432 }
3433
3434 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3435 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3436 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3437 GEN_VEXT_VF(vfmacc_vf_h, 2)
3438 GEN_VEXT_VF(vfmacc_vf_w, 4)
3439 GEN_VEXT_VF(vfmacc_vf_d, 8)
3440
3441 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3442 {
3443 return float16_muladd(a, b, d, float_muladd_negate_c |
3444 float_muladd_negate_product, s);
3445 }
3446
fnmacc32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3447 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3448 {
3449 return float32_muladd(a, b, d, float_muladd_negate_c |
3450 float_muladd_negate_product, s);
3451 }
3452
fnmacc64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3453 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3454 {
3455 return float64_muladd(a, b, d, float_muladd_negate_c |
3456 float_muladd_negate_product, s);
3457 }
3458
RVVCALL(OPFVV3,vfnmacc_vv_h,OP_UUU_H,H2,H2,H2,fnmacc16)3459 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3460 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3461 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3462 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3463 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3464 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3465 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3466 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3467 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3468 GEN_VEXT_VF(vfnmacc_vf_h, 2)
3469 GEN_VEXT_VF(vfnmacc_vf_w, 4)
3470 GEN_VEXT_VF(vfnmacc_vf_d, 8)
3471
3472 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3473 {
3474 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3475 }
3476
fmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3477 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3478 {
3479 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3480 }
3481
fmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3482 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3483 {
3484 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3485 }
3486
RVVCALL(OPFVV3,vfmsac_vv_h,OP_UUU_H,H2,H2,H2,fmsac16)3487 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3488 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3489 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3490 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3491 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3492 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3493 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3494 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3495 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3496 GEN_VEXT_VF(vfmsac_vf_h, 2)
3497 GEN_VEXT_VF(vfmsac_vf_w, 4)
3498 GEN_VEXT_VF(vfmsac_vf_d, 8)
3499
3500 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3501 {
3502 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3503 }
3504
fnmsac32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3505 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3506 {
3507 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3508 }
3509
fnmsac64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3510 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3511 {
3512 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3513 }
3514
RVVCALL(OPFVV3,vfnmsac_vv_h,OP_UUU_H,H2,H2,H2,fnmsac16)3515 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3516 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3517 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3518 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3519 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3520 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3521 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3522 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3523 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3524 GEN_VEXT_VF(vfnmsac_vf_h, 2)
3525 GEN_VEXT_VF(vfnmsac_vf_w, 4)
3526 GEN_VEXT_VF(vfnmsac_vf_d, 8)
3527
3528 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3529 {
3530 return float16_muladd(d, b, a, 0, s);
3531 }
3532
fmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3533 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3534 {
3535 return float32_muladd(d, b, a, 0, s);
3536 }
3537
fmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3538 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3539 {
3540 return float64_muladd(d, b, a, 0, s);
3541 }
3542
RVVCALL(OPFVV3,vfmadd_vv_h,OP_UUU_H,H2,H2,H2,fmadd16)3543 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3544 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3545 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3546 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3547 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3548 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3549 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3550 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3551 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3552 GEN_VEXT_VF(vfmadd_vf_h, 2)
3553 GEN_VEXT_VF(vfmadd_vf_w, 4)
3554 GEN_VEXT_VF(vfmadd_vf_d, 8)
3555
3556 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3557 {
3558 return float16_muladd(d, b, a, float_muladd_negate_c |
3559 float_muladd_negate_product, s);
3560 }
3561
fnmadd32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3562 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3563 {
3564 return float32_muladd(d, b, a, float_muladd_negate_c |
3565 float_muladd_negate_product, s);
3566 }
3567
fnmadd64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3568 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3569 {
3570 return float64_muladd(d, b, a, float_muladd_negate_c |
3571 float_muladd_negate_product, s);
3572 }
3573
RVVCALL(OPFVV3,vfnmadd_vv_h,OP_UUU_H,H2,H2,H2,fnmadd16)3574 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3575 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3576 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3577 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3578 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3579 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3580 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3581 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3582 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3583 GEN_VEXT_VF(vfnmadd_vf_h, 2)
3584 GEN_VEXT_VF(vfnmadd_vf_w, 4)
3585 GEN_VEXT_VF(vfnmadd_vf_d, 8)
3586
3587 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3588 {
3589 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3590 }
3591
fmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3592 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3593 {
3594 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3595 }
3596
fmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3597 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3598 {
3599 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3600 }
3601
RVVCALL(OPFVV3,vfmsub_vv_h,OP_UUU_H,H2,H2,H2,fmsub16)3602 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3603 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3604 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3605 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3606 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3607 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3608 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3609 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3610 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3611 GEN_VEXT_VF(vfmsub_vf_h, 2)
3612 GEN_VEXT_VF(vfmsub_vf_w, 4)
3613 GEN_VEXT_VF(vfmsub_vf_d, 8)
3614
3615 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3616 {
3617 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3618 }
3619
fnmsub32(uint32_t a,uint32_t b,uint32_t d,float_status * s)3620 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3621 {
3622 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3623 }
3624
fnmsub64(uint64_t a,uint64_t b,uint64_t d,float_status * s)3625 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3626 {
3627 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3628 }
3629
RVVCALL(OPFVV3,vfnmsub_vv_h,OP_UUU_H,H2,H2,H2,fnmsub16)3630 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3631 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3632 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3633 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3634 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3635 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3636 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3637 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3638 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3639 GEN_VEXT_VF(vfnmsub_vf_h, 2)
3640 GEN_VEXT_VF(vfnmsub_vf_w, 4)
3641 GEN_VEXT_VF(vfnmsub_vf_d, 8)
3642
3643 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3644 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3645 {
3646 return float32_muladd(float16_to_float32(a, true, s),
3647 float16_to_float32(b, true, s), d, 0, s);
3648 }
3649
fwmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3650 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3651 {
3652 return float64_muladd(float32_to_float64(a, s),
3653 float32_to_float64(b, s), d, 0, s);
3654 }
3655
RVVCALL(OPFVV3,vfwmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwmacc16)3656 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3657 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3658 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3659 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3660 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3661 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3662 GEN_VEXT_VF(vfwmacc_vf_h, 4)
3663 GEN_VEXT_VF(vfwmacc_vf_w, 8)
3664
3665 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3666 {
3667 return float32_muladd(bfloat16_to_float32(a, s),
3668 bfloat16_to_float32(b, s), d, 0, s);
3669 }
3670
RVVCALL(OPFVV3,vfwmaccbf16_vv,WOP_UUU_H,H4,H2,H2,fwmaccbf16)3671 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3672 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3673 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3674 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3675
3676 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3677 {
3678 return float32_muladd(float16_to_float32(a, true, s),
3679 float16_to_float32(b, true, s), d,
3680 float_muladd_negate_c | float_muladd_negate_product,
3681 s);
3682 }
3683
fwnmacc32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3684 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3685 {
3686 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3687 d, float_muladd_negate_c |
3688 float_muladd_negate_product, s);
3689 }
3690
RVVCALL(OPFVV3,vfwnmacc_vv_h,WOP_UUU_H,H4,H2,H2,fwnmacc16)3691 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3692 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3693 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3694 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3695 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3696 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3697 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3698 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3699
3700 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3701 {
3702 return float32_muladd(float16_to_float32(a, true, s),
3703 float16_to_float32(b, true, s), d,
3704 float_muladd_negate_c, s);
3705 }
3706
fwmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3707 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3708 {
3709 return float64_muladd(float32_to_float64(a, s),
3710 float32_to_float64(b, s), d,
3711 float_muladd_negate_c, s);
3712 }
3713
RVVCALL(OPFVV3,vfwmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwmsac16)3714 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3715 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3716 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3717 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3718 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3719 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3720 GEN_VEXT_VF(vfwmsac_vf_h, 4)
3721 GEN_VEXT_VF(vfwmsac_vf_w, 8)
3722
3723 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3724 {
3725 return float32_muladd(float16_to_float32(a, true, s),
3726 float16_to_float32(b, true, s), d,
3727 float_muladd_negate_product, s);
3728 }
3729
fwnmsac32(uint32_t a,uint32_t b,uint64_t d,float_status * s)3730 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3731 {
3732 return float64_muladd(float32_to_float64(a, s),
3733 float32_to_float64(b, s), d,
3734 float_muladd_negate_product, s);
3735 }
3736
RVVCALL(OPFVV3,vfwnmsac_vv_h,WOP_UUU_H,H4,H2,H2,fwnmsac16)3737 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3738 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3739 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3740 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3741 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3742 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3743 GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3744 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3745
3746 /* Vector Floating-Point Square-Root Instruction */
3747 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3748 static void do_##NAME(void *vd, void *vs2, int i, \
3749 CPURISCVState *env) \
3750 { \
3751 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3752 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3753 }
3754
3755 #define GEN_VEXT_V_ENV(NAME, ESZ) \
3756 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3757 CPURISCVState *env, uint32_t desc) \
3758 { \
3759 uint32_t vm = vext_vm(desc); \
3760 uint32_t vl = env->vl; \
3761 uint32_t total_elems = \
3762 vext_get_total_elems(env, desc, ESZ); \
3763 uint32_t vta = vext_vta(desc); \
3764 uint32_t vma = vext_vma(desc); \
3765 uint32_t i; \
3766 \
3767 VSTART_CHECK_EARLY_EXIT(env, vl); \
3768 \
3769 if (vl == 0) { \
3770 return; \
3771 } \
3772 for (i = env->vstart; i < vl; i++) { \
3773 if (!vm && !vext_elem_mask(v0, i)) { \
3774 /* set masked-off elements to 1s */ \
3775 vext_set_elems_1s(vd, vma, i * ESZ, \
3776 (i + 1) * ESZ); \
3777 continue; \
3778 } \
3779 do_##NAME(vd, vs2, i, env); \
3780 } \
3781 env->vstart = 0; \
3782 vext_set_elems_1s(vd, vta, vl * ESZ, \
3783 total_elems * ESZ); \
3784 }
3785
3786 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3787 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3788 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3789 GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3790 GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3791 GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3792
3793 /*
3794 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3795 *
3796 * Adapted from riscv-v-spec recip.c:
3797 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3798 */
3799 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3800 {
3801 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3802 uint64_t exp = extract64(f, frac_size, exp_size);
3803 uint64_t frac = extract64(f, 0, frac_size);
3804
3805 const uint8_t lookup_table[] = {
3806 52, 51, 50, 48, 47, 46, 44, 43,
3807 42, 41, 40, 39, 38, 36, 35, 34,
3808 33, 32, 31, 30, 30, 29, 28, 27,
3809 26, 25, 24, 23, 23, 22, 21, 20,
3810 19, 19, 18, 17, 16, 16, 15, 14,
3811 14, 13, 12, 12, 11, 10, 10, 9,
3812 9, 8, 7, 7, 6, 6, 5, 4,
3813 4, 3, 3, 2, 2, 1, 1, 0,
3814 127, 125, 123, 121, 119, 118, 116, 114,
3815 113, 111, 109, 108, 106, 105, 103, 102,
3816 100, 99, 97, 96, 95, 93, 92, 91,
3817 90, 88, 87, 86, 85, 84, 83, 82,
3818 80, 79, 78, 77, 76, 75, 74, 73,
3819 72, 71, 70, 70, 69, 68, 67, 66,
3820 65, 64, 63, 63, 62, 61, 60, 59,
3821 59, 58, 57, 56, 56, 55, 54, 53
3822 };
3823 const int precision = 7;
3824
3825 if (exp == 0 && frac != 0) { /* subnormal */
3826 /* Normalize the subnormal. */
3827 while (extract64(frac, frac_size - 1, 1) == 0) {
3828 exp--;
3829 frac <<= 1;
3830 }
3831
3832 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3833 }
3834
3835 int idx = ((exp & 1) << (precision - 1)) |
3836 (frac >> (frac_size - precision + 1));
3837 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3838 (frac_size - precision);
3839 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3840
3841 uint64_t val = 0;
3842 val = deposit64(val, 0, frac_size, out_frac);
3843 val = deposit64(val, frac_size, exp_size, out_exp);
3844 val = deposit64(val, frac_size + exp_size, 1, sign);
3845 return val;
3846 }
3847
frsqrt7_h(float16 f,float_status * s)3848 static float16 frsqrt7_h(float16 f, float_status *s)
3849 {
3850 int exp_size = 5, frac_size = 10;
3851 bool sign = float16_is_neg(f);
3852
3853 /*
3854 * frsqrt7(sNaN) = canonical NaN
3855 * frsqrt7(-inf) = canonical NaN
3856 * frsqrt7(-normal) = canonical NaN
3857 * frsqrt7(-subnormal) = canonical NaN
3858 */
3859 if (float16_is_signaling_nan(f, s) ||
3860 (float16_is_infinity(f) && sign) ||
3861 (float16_is_normal(f) && sign) ||
3862 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3863 s->float_exception_flags |= float_flag_invalid;
3864 return float16_default_nan(s);
3865 }
3866
3867 /* frsqrt7(qNaN) = canonical NaN */
3868 if (float16_is_quiet_nan(f, s)) {
3869 return float16_default_nan(s);
3870 }
3871
3872 /* frsqrt7(+-0) = +-inf */
3873 if (float16_is_zero(f)) {
3874 s->float_exception_flags |= float_flag_divbyzero;
3875 return float16_set_sign(float16_infinity, sign);
3876 }
3877
3878 /* frsqrt7(+inf) = +0 */
3879 if (float16_is_infinity(f) && !sign) {
3880 return float16_set_sign(float16_zero, sign);
3881 }
3882
3883 /* +normal, +subnormal */
3884 uint64_t val = frsqrt7(f, exp_size, frac_size);
3885 return make_float16(val);
3886 }
3887
frsqrt7_s(float32 f,float_status * s)3888 static float32 frsqrt7_s(float32 f, float_status *s)
3889 {
3890 int exp_size = 8, frac_size = 23;
3891 bool sign = float32_is_neg(f);
3892
3893 /*
3894 * frsqrt7(sNaN) = canonical NaN
3895 * frsqrt7(-inf) = canonical NaN
3896 * frsqrt7(-normal) = canonical NaN
3897 * frsqrt7(-subnormal) = canonical NaN
3898 */
3899 if (float32_is_signaling_nan(f, s) ||
3900 (float32_is_infinity(f) && sign) ||
3901 (float32_is_normal(f) && sign) ||
3902 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3903 s->float_exception_flags |= float_flag_invalid;
3904 return float32_default_nan(s);
3905 }
3906
3907 /* frsqrt7(qNaN) = canonical NaN */
3908 if (float32_is_quiet_nan(f, s)) {
3909 return float32_default_nan(s);
3910 }
3911
3912 /* frsqrt7(+-0) = +-inf */
3913 if (float32_is_zero(f)) {
3914 s->float_exception_flags |= float_flag_divbyzero;
3915 return float32_set_sign(float32_infinity, sign);
3916 }
3917
3918 /* frsqrt7(+inf) = +0 */
3919 if (float32_is_infinity(f) && !sign) {
3920 return float32_set_sign(float32_zero, sign);
3921 }
3922
3923 /* +normal, +subnormal */
3924 uint64_t val = frsqrt7(f, exp_size, frac_size);
3925 return make_float32(val);
3926 }
3927
frsqrt7_d(float64 f,float_status * s)3928 static float64 frsqrt7_d(float64 f, float_status *s)
3929 {
3930 int exp_size = 11, frac_size = 52;
3931 bool sign = float64_is_neg(f);
3932
3933 /*
3934 * frsqrt7(sNaN) = canonical NaN
3935 * frsqrt7(-inf) = canonical NaN
3936 * frsqrt7(-normal) = canonical NaN
3937 * frsqrt7(-subnormal) = canonical NaN
3938 */
3939 if (float64_is_signaling_nan(f, s) ||
3940 (float64_is_infinity(f) && sign) ||
3941 (float64_is_normal(f) && sign) ||
3942 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3943 s->float_exception_flags |= float_flag_invalid;
3944 return float64_default_nan(s);
3945 }
3946
3947 /* frsqrt7(qNaN) = canonical NaN */
3948 if (float64_is_quiet_nan(f, s)) {
3949 return float64_default_nan(s);
3950 }
3951
3952 /* frsqrt7(+-0) = +-inf */
3953 if (float64_is_zero(f)) {
3954 s->float_exception_flags |= float_flag_divbyzero;
3955 return float64_set_sign(float64_infinity, sign);
3956 }
3957
3958 /* frsqrt7(+inf) = +0 */
3959 if (float64_is_infinity(f) && !sign) {
3960 return float64_set_sign(float64_zero, sign);
3961 }
3962
3963 /* +normal, +subnormal */
3964 uint64_t val = frsqrt7(f, exp_size, frac_size);
3965 return make_float64(val);
3966 }
3967
RVVCALL(OPFVV1,vfrsqrt7_v_h,OP_UU_H,H2,H2,frsqrt7_h)3968 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3969 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3970 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3971 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3972 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3973 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3974
3975 /*
3976 * Vector Floating-Point Reciprocal Estimate Instruction
3977 *
3978 * Adapted from riscv-v-spec recip.c:
3979 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3980 */
3981 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3982 float_status *s)
3983 {
3984 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3985 uint64_t exp = extract64(f, frac_size, exp_size);
3986 uint64_t frac = extract64(f, 0, frac_size);
3987
3988 const uint8_t lookup_table[] = {
3989 127, 125, 123, 121, 119, 117, 116, 114,
3990 112, 110, 109, 107, 105, 104, 102, 100,
3991 99, 97, 96, 94, 93, 91, 90, 88,
3992 87, 85, 84, 83, 81, 80, 79, 77,
3993 76, 75, 74, 72, 71, 70, 69, 68,
3994 66, 65, 64, 63, 62, 61, 60, 59,
3995 58, 57, 56, 55, 54, 53, 52, 51,
3996 50, 49, 48, 47, 46, 45, 44, 43,
3997 42, 41, 40, 40, 39, 38, 37, 36,
3998 35, 35, 34, 33, 32, 31, 31, 30,
3999 29, 28, 28, 27, 26, 25, 25, 24,
4000 23, 23, 22, 21, 21, 20, 19, 19,
4001 18, 17, 17, 16, 15, 15, 14, 14,
4002 13, 12, 12, 11, 11, 10, 9, 9,
4003 8, 8, 7, 7, 6, 5, 5, 4,
4004 4, 3, 3, 2, 2, 1, 1, 0
4005 };
4006 const int precision = 7;
4007
4008 if (exp == 0 && frac != 0) { /* subnormal */
4009 /* Normalize the subnormal. */
4010 while (extract64(frac, frac_size - 1, 1) == 0) {
4011 exp--;
4012 frac <<= 1;
4013 }
4014
4015 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
4016
4017 if (exp != 0 && exp != UINT64_MAX) {
4018 /*
4019 * Overflow to inf or max value of same sign,
4020 * depending on sign and rounding mode.
4021 */
4022 s->float_exception_flags |= (float_flag_inexact |
4023 float_flag_overflow);
4024
4025 if ((s->float_rounding_mode == float_round_to_zero) ||
4026 ((s->float_rounding_mode == float_round_down) && !sign) ||
4027 ((s->float_rounding_mode == float_round_up) && sign)) {
4028 /* Return greatest/negative finite value. */
4029 return (sign << (exp_size + frac_size)) |
4030 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
4031 } else {
4032 /* Return +-inf. */
4033 return (sign << (exp_size + frac_size)) |
4034 MAKE_64BIT_MASK(frac_size, exp_size);
4035 }
4036 }
4037 }
4038
4039 int idx = frac >> (frac_size - precision);
4040 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
4041 (frac_size - precision);
4042 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
4043
4044 if (out_exp == 0 || out_exp == UINT64_MAX) {
4045 /*
4046 * The result is subnormal, but don't raise the underflow exception,
4047 * because there's no additional loss of precision.
4048 */
4049 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
4050 if (out_exp == UINT64_MAX) {
4051 out_frac >>= 1;
4052 out_exp = 0;
4053 }
4054 }
4055
4056 uint64_t val = 0;
4057 val = deposit64(val, 0, frac_size, out_frac);
4058 val = deposit64(val, frac_size, exp_size, out_exp);
4059 val = deposit64(val, frac_size + exp_size, 1, sign);
4060 return val;
4061 }
4062
frec7_h(float16 f,float_status * s)4063 static float16 frec7_h(float16 f, float_status *s)
4064 {
4065 int exp_size = 5, frac_size = 10;
4066 bool sign = float16_is_neg(f);
4067
4068 /* frec7(+-inf) = +-0 */
4069 if (float16_is_infinity(f)) {
4070 return float16_set_sign(float16_zero, sign);
4071 }
4072
4073 /* frec7(+-0) = +-inf */
4074 if (float16_is_zero(f)) {
4075 s->float_exception_flags |= float_flag_divbyzero;
4076 return float16_set_sign(float16_infinity, sign);
4077 }
4078
4079 /* frec7(sNaN) = canonical NaN */
4080 if (float16_is_signaling_nan(f, s)) {
4081 s->float_exception_flags |= float_flag_invalid;
4082 return float16_default_nan(s);
4083 }
4084
4085 /* frec7(qNaN) = canonical NaN */
4086 if (float16_is_quiet_nan(f, s)) {
4087 return float16_default_nan(s);
4088 }
4089
4090 /* +-normal, +-subnormal */
4091 uint64_t val = frec7(f, exp_size, frac_size, s);
4092 return make_float16(val);
4093 }
4094
frec7_s(float32 f,float_status * s)4095 static float32 frec7_s(float32 f, float_status *s)
4096 {
4097 int exp_size = 8, frac_size = 23;
4098 bool sign = float32_is_neg(f);
4099
4100 /* frec7(+-inf) = +-0 */
4101 if (float32_is_infinity(f)) {
4102 return float32_set_sign(float32_zero, sign);
4103 }
4104
4105 /* frec7(+-0) = +-inf */
4106 if (float32_is_zero(f)) {
4107 s->float_exception_flags |= float_flag_divbyzero;
4108 return float32_set_sign(float32_infinity, sign);
4109 }
4110
4111 /* frec7(sNaN) = canonical NaN */
4112 if (float32_is_signaling_nan(f, s)) {
4113 s->float_exception_flags |= float_flag_invalid;
4114 return float32_default_nan(s);
4115 }
4116
4117 /* frec7(qNaN) = canonical NaN */
4118 if (float32_is_quiet_nan(f, s)) {
4119 return float32_default_nan(s);
4120 }
4121
4122 /* +-normal, +-subnormal */
4123 uint64_t val = frec7(f, exp_size, frac_size, s);
4124 return make_float32(val);
4125 }
4126
frec7_d(float64 f,float_status * s)4127 static float64 frec7_d(float64 f, float_status *s)
4128 {
4129 int exp_size = 11, frac_size = 52;
4130 bool sign = float64_is_neg(f);
4131
4132 /* frec7(+-inf) = +-0 */
4133 if (float64_is_infinity(f)) {
4134 return float64_set_sign(float64_zero, sign);
4135 }
4136
4137 /* frec7(+-0) = +-inf */
4138 if (float64_is_zero(f)) {
4139 s->float_exception_flags |= float_flag_divbyzero;
4140 return float64_set_sign(float64_infinity, sign);
4141 }
4142
4143 /* frec7(sNaN) = canonical NaN */
4144 if (float64_is_signaling_nan(f, s)) {
4145 s->float_exception_flags |= float_flag_invalid;
4146 return float64_default_nan(s);
4147 }
4148
4149 /* frec7(qNaN) = canonical NaN */
4150 if (float64_is_quiet_nan(f, s)) {
4151 return float64_default_nan(s);
4152 }
4153
4154 /* +-normal, +-subnormal */
4155 uint64_t val = frec7(f, exp_size, frac_size, s);
4156 return make_float64(val);
4157 }
4158
RVVCALL(OPFVV1,vfrec7_v_h,OP_UU_H,H2,H2,frec7_h)4159 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4160 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4161 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
4162 GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4163 GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4164 GEN_VEXT_V_ENV(vfrec7_v_d, 8)
4165
4166 /* Vector Floating-Point MIN/MAX Instructions */
4167 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4168 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4169 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
4170 GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4171 GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4172 GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
4173 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4174 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4175 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
4176 GEN_VEXT_VF(vfmin_vf_h, 2)
4177 GEN_VEXT_VF(vfmin_vf_w, 4)
4178 GEN_VEXT_VF(vfmin_vf_d, 8)
4179
4180 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4181 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4182 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
4183 GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4184 GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4185 GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
4186 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4187 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4188 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
4189 GEN_VEXT_VF(vfmax_vf_h, 2)
4190 GEN_VEXT_VF(vfmax_vf_w, 4)
4191 GEN_VEXT_VF(vfmax_vf_d, 8)
4192
4193 /* Vector Floating-Point Sign-Injection Instructions */
4194 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4195 {
4196 return deposit64(b, 0, 15, a);
4197 }
4198
fsgnj32(uint32_t a,uint32_t b,float_status * s)4199 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4200 {
4201 return deposit64(b, 0, 31, a);
4202 }
4203
fsgnj64(uint64_t a,uint64_t b,float_status * s)4204 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4205 {
4206 return deposit64(b, 0, 63, a);
4207 }
4208
RVVCALL(OPFVV2,vfsgnj_vv_h,OP_UUU_H,H2,H2,H2,fsgnj16)4209 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4210 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4211 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
4212 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4213 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4214 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
4215 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4216 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4217 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
4218 GEN_VEXT_VF(vfsgnj_vf_h, 2)
4219 GEN_VEXT_VF(vfsgnj_vf_w, 4)
4220 GEN_VEXT_VF(vfsgnj_vf_d, 8)
4221
4222 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4223 {
4224 return deposit64(~b, 0, 15, a);
4225 }
4226
fsgnjn32(uint32_t a,uint32_t b,float_status * s)4227 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4228 {
4229 return deposit64(~b, 0, 31, a);
4230 }
4231
fsgnjn64(uint64_t a,uint64_t b,float_status * s)4232 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4233 {
4234 return deposit64(~b, 0, 63, a);
4235 }
4236
RVVCALL(OPFVV2,vfsgnjn_vv_h,OP_UUU_H,H2,H2,H2,fsgnjn16)4237 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4238 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4239 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
4240 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4241 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4242 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
4243 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4244 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4245 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
4246 GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4247 GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4248 GEN_VEXT_VF(vfsgnjn_vf_d, 8)
4249
4250 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4251 {
4252 return deposit64(b ^ a, 0, 15, a);
4253 }
4254
fsgnjx32(uint32_t a,uint32_t b,float_status * s)4255 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4256 {
4257 return deposit64(b ^ a, 0, 31, a);
4258 }
4259
fsgnjx64(uint64_t a,uint64_t b,float_status * s)4260 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4261 {
4262 return deposit64(b ^ a, 0, 63, a);
4263 }
4264
RVVCALL(OPFVV2,vfsgnjx_vv_h,OP_UUU_H,H2,H2,H2,fsgnjx16)4265 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4266 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4267 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4268 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4269 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4270 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4271 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4272 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4273 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4274 GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4275 GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4276 GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4277
4278 /* Vector Floating-Point Compare Instructions */
4279 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4280 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4281 CPURISCVState *env, uint32_t desc) \
4282 { \
4283 uint32_t vm = vext_vm(desc); \
4284 uint32_t vl = env->vl; \
4285 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4286 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4287 uint32_t vma = vext_vma(desc); \
4288 uint32_t i; \
4289 \
4290 VSTART_CHECK_EARLY_EXIT(env, vl); \
4291 \
4292 for (i = env->vstart; i < vl; i++) { \
4293 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4294 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4295 if (!vm && !vext_elem_mask(v0, i)) { \
4296 /* set masked-off elements to 1s */ \
4297 if (vma) { \
4298 vext_set_elem_mask(vd, i, 1); \
4299 } \
4300 continue; \
4301 } \
4302 vext_set_elem_mask(vd, i, \
4303 DO_OP(s2, s1, &env->fp_status)); \
4304 } \
4305 env->vstart = 0; \
4306 /*
4307 * mask destination register are always tail-agnostic
4308 * set tail elements to 1s
4309 */ \
4310 if (vta_all_1s) { \
4311 for (; i < total_elems; i++) { \
4312 vext_set_elem_mask(vd, i, 1); \
4313 } \
4314 } \
4315 }
4316
4317 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4318 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4319 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4320
4321 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4322 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4323 CPURISCVState *env, uint32_t desc) \
4324 { \
4325 uint32_t vm = vext_vm(desc); \
4326 uint32_t vl = env->vl; \
4327 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4328 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4329 uint32_t vma = vext_vma(desc); \
4330 uint32_t i; \
4331 \
4332 VSTART_CHECK_EARLY_EXIT(env, vl); \
4333 \
4334 for (i = env->vstart; i < vl; i++) { \
4335 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4336 if (!vm && !vext_elem_mask(v0, i)) { \
4337 /* set masked-off elements to 1s */ \
4338 if (vma) { \
4339 vext_set_elem_mask(vd, i, 1); \
4340 } \
4341 continue; \
4342 } \
4343 vext_set_elem_mask(vd, i, \
4344 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4345 } \
4346 env->vstart = 0; \
4347 /*
4348 * mask destination register are always tail-agnostic
4349 * set tail elements to 1s
4350 */ \
4351 if (vta_all_1s) { \
4352 for (; i < total_elems; i++) { \
4353 vext_set_elem_mask(vd, i, 1); \
4354 } \
4355 } \
4356 }
4357
4358 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4359 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4360 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4361
4362 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4363 {
4364 FloatRelation compare = float16_compare_quiet(a, b, s);
4365 return compare != float_relation_equal;
4366 }
4367
vmfne32(uint32_t a,uint32_t b,float_status * s)4368 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4369 {
4370 FloatRelation compare = float32_compare_quiet(a, b, s);
4371 return compare != float_relation_equal;
4372 }
4373
vmfne64(uint64_t a,uint64_t b,float_status * s)4374 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4375 {
4376 FloatRelation compare = float64_compare_quiet(a, b, s);
4377 return compare != float_relation_equal;
4378 }
4379
GEN_VEXT_CMP_VV_ENV(vmfne_vv_h,uint16_t,H2,vmfne16)4380 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4381 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4382 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4383 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4384 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4385 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4386
4387 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4388 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4389 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4390 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4391 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4392 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4393
4394 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4395 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4396 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4397 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4398 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4399 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4400
4401 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4402 {
4403 FloatRelation compare = float16_compare(a, b, s);
4404 return compare == float_relation_greater;
4405 }
4406
vmfgt32(uint32_t a,uint32_t b,float_status * s)4407 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4408 {
4409 FloatRelation compare = float32_compare(a, b, s);
4410 return compare == float_relation_greater;
4411 }
4412
vmfgt64(uint64_t a,uint64_t b,float_status * s)4413 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4414 {
4415 FloatRelation compare = float64_compare(a, b, s);
4416 return compare == float_relation_greater;
4417 }
4418
GEN_VEXT_CMP_VF(vmfgt_vf_h,uint16_t,H2,vmfgt16)4419 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4420 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4421 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4422
4423 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4424 {
4425 FloatRelation compare = float16_compare(a, b, s);
4426 return compare == float_relation_greater ||
4427 compare == float_relation_equal;
4428 }
4429
vmfge32(uint32_t a,uint32_t b,float_status * s)4430 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4431 {
4432 FloatRelation compare = float32_compare(a, b, s);
4433 return compare == float_relation_greater ||
4434 compare == float_relation_equal;
4435 }
4436
vmfge64(uint64_t a,uint64_t b,float_status * s)4437 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4438 {
4439 FloatRelation compare = float64_compare(a, b, s);
4440 return compare == float_relation_greater ||
4441 compare == float_relation_equal;
4442 }
4443
GEN_VEXT_CMP_VF(vmfge_vf_h,uint16_t,H2,vmfge16)4444 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4445 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4446 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4447
4448 /* Vector Floating-Point Classify Instruction */
4449 target_ulong fclass_h(uint64_t frs1)
4450 {
4451 float16 f = frs1;
4452 bool sign = float16_is_neg(f);
4453
4454 if (float16_is_infinity(f)) {
4455 return sign ? 1 << 0 : 1 << 7;
4456 } else if (float16_is_zero(f)) {
4457 return sign ? 1 << 3 : 1 << 4;
4458 } else if (float16_is_zero_or_denormal(f)) {
4459 return sign ? 1 << 2 : 1 << 5;
4460 } else if (float16_is_any_nan(f)) {
4461 float_status s = { }; /* for snan_bit_is_one */
4462 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4463 } else {
4464 return sign ? 1 << 1 : 1 << 6;
4465 }
4466 }
4467
fclass_s(uint64_t frs1)4468 target_ulong fclass_s(uint64_t frs1)
4469 {
4470 float32 f = frs1;
4471 bool sign = float32_is_neg(f);
4472
4473 if (float32_is_infinity(f)) {
4474 return sign ? 1 << 0 : 1 << 7;
4475 } else if (float32_is_zero(f)) {
4476 return sign ? 1 << 3 : 1 << 4;
4477 } else if (float32_is_zero_or_denormal(f)) {
4478 return sign ? 1 << 2 : 1 << 5;
4479 } else if (float32_is_any_nan(f)) {
4480 float_status s = { }; /* for snan_bit_is_one */
4481 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4482 } else {
4483 return sign ? 1 << 1 : 1 << 6;
4484 }
4485 }
4486
fclass_d(uint64_t frs1)4487 target_ulong fclass_d(uint64_t frs1)
4488 {
4489 float64 f = frs1;
4490 bool sign = float64_is_neg(f);
4491
4492 if (float64_is_infinity(f)) {
4493 return sign ? 1 << 0 : 1 << 7;
4494 } else if (float64_is_zero(f)) {
4495 return sign ? 1 << 3 : 1 << 4;
4496 } else if (float64_is_zero_or_denormal(f)) {
4497 return sign ? 1 << 2 : 1 << 5;
4498 } else if (float64_is_any_nan(f)) {
4499 float_status s = { }; /* for snan_bit_is_one */
4500 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4501 } else {
4502 return sign ? 1 << 1 : 1 << 6;
4503 }
4504 }
4505
RVVCALL(OPIVV1,vfclass_v_h,OP_UU_H,H2,H2,fclass_h)4506 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4507 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4508 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4509 GEN_VEXT_V(vfclass_v_h, 2)
4510 GEN_VEXT_V(vfclass_v_w, 4)
4511 GEN_VEXT_V(vfclass_v_d, 8)
4512
4513 /* Vector Floating-Point Merge Instruction */
4514
4515 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4516 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4517 CPURISCVState *env, uint32_t desc) \
4518 { \
4519 uint32_t vm = vext_vm(desc); \
4520 uint32_t vl = env->vl; \
4521 uint32_t esz = sizeof(ETYPE); \
4522 uint32_t total_elems = \
4523 vext_get_total_elems(env, desc, esz); \
4524 uint32_t vta = vext_vta(desc); \
4525 uint32_t i; \
4526 \
4527 VSTART_CHECK_EARLY_EXIT(env, vl); \
4528 \
4529 for (i = env->vstart; i < vl; i++) { \
4530 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4531 *((ETYPE *)vd + H(i)) = \
4532 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4533 } \
4534 env->vstart = 0; \
4535 /* set tail elements to 1s */ \
4536 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4537 }
4538
4539 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4540 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4541 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4542
4543 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4544 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4545 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4546 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4547 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4548 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4549 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4550 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4551
4552 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4553 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4554 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4555 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4556 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4557 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4558 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4559
4560 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4561 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4562 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4563 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4564 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4565 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4566 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4567
4568 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4569 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4570 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4571 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4572 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4573 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4574 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4575
4576 /* Widening Floating-Point/Integer Type-Convert Instructions */
4577 /* (TD, T2, TX2) */
4578 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4579 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4580 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4581 /*
4582 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4583 */
4584 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4585 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4586 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4587 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4588
4589 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4590 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4591 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4592 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4593 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4594
4595 /*
4596 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4597 */
4598 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4599 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4600 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4601 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4602 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4603 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4604
4605 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4606 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4607 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4608 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4609 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4610 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4611 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4612
4613 /*
4614 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4615 */
4616 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4617 {
4618 return float16_to_float32(a, true, s);
4619 }
4620
RVVCALL(OPFVV1,vfwcvt_f_f_v_h,WOP_UU_H,H4,H2,vfwcvtffv16)4621 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4622 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4623 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4624 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4625
4626 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4627 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4628
4629 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4630 /* (TD, T2, TX2) */
4631 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4632 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4633 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4634 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4635 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4636 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4637 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4638 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4639 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4640 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4641
4642 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4643 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4644 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4645 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4646 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4647 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4648 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4649
4650 /*
4651 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4652 */
4653 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4654 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4655 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4656 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4657
4658 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4659 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4660 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4661 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4662 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4663
4664 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4665 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4666 {
4667 return float32_to_float16(a, true, s);
4668 }
4669
RVVCALL(OPFVV1,vfncvt_f_f_w_h,NOP_UU_H,H2,H4,vfncvtffv16)4670 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4671 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4672 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4673 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4674
4675 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4676 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4677
4678 /*
4679 * Vector Reduction Operations
4680 */
4681 /* Vector Single-Width Integer Reduction Instructions */
4682 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4683 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4684 void *vs2, CPURISCVState *env, \
4685 uint32_t desc) \
4686 { \
4687 uint32_t vm = vext_vm(desc); \
4688 uint32_t vl = env->vl; \
4689 uint32_t esz = sizeof(TD); \
4690 uint32_t vlenb = simd_maxsz(desc); \
4691 uint32_t vta = vext_vta(desc); \
4692 uint32_t i; \
4693 TD s1 = *((TD *)vs1 + HD(0)); \
4694 \
4695 VSTART_CHECK_EARLY_EXIT(env, vl); \
4696 \
4697 for (i = env->vstart; i < vl; i++) { \
4698 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4699 if (!vm && !vext_elem_mask(v0, i)) { \
4700 continue; \
4701 } \
4702 s1 = OP(s1, (TD)s2); \
4703 } \
4704 if (vl > 0) { \
4705 *((TD *)vd + HD(0)) = s1; \
4706 } \
4707 env->vstart = 0; \
4708 /* set tail elements to 1s */ \
4709 vext_set_elems_1s(vd, vta, esz, vlenb); \
4710 }
4711
4712 /* vd[0] = sum(vs1[0], vs2[*]) */
4713 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4714 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4715 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4716 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4717
4718 /* vd[0] = maxu(vs1[0], vs2[*]) */
4719 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4720 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4721 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4722 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4723
4724 /* vd[0] = max(vs1[0], vs2[*]) */
4725 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4726 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4727 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4728 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4729
4730 /* vd[0] = minu(vs1[0], vs2[*]) */
4731 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4732 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4733 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4734 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4735
4736 /* vd[0] = min(vs1[0], vs2[*]) */
4737 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4738 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4739 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4740 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4741
4742 /* vd[0] = and(vs1[0], vs2[*]) */
4743 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4744 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4745 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4746 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4747
4748 /* vd[0] = or(vs1[0], vs2[*]) */
4749 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4750 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4751 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4752 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4753
4754 /* vd[0] = xor(vs1[0], vs2[*]) */
4755 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4756 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4757 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4758 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4759
4760 /* Vector Widening Integer Reduction Instructions */
4761 /* signed sum reduction into double-width accumulator */
4762 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4763 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4764 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4765
4766 /* Unsigned sum reduction into double-width accumulator */
4767 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4768 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4769 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4770
4771 /* Vector Single-Width Floating-Point Reduction Instructions */
4772 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4773 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4774 void *vs2, CPURISCVState *env, \
4775 uint32_t desc) \
4776 { \
4777 uint32_t vm = vext_vm(desc); \
4778 uint32_t vl = env->vl; \
4779 uint32_t esz = sizeof(TD); \
4780 uint32_t vlenb = simd_maxsz(desc); \
4781 uint32_t vta = vext_vta(desc); \
4782 uint32_t i; \
4783 TD s1 = *((TD *)vs1 + HD(0)); \
4784 \
4785 VSTART_CHECK_EARLY_EXIT(env, vl); \
4786 \
4787 for (i = env->vstart; i < vl; i++) { \
4788 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4789 if (!vm && !vext_elem_mask(v0, i)) { \
4790 continue; \
4791 } \
4792 s1 = OP(s1, (TD)s2, &env->fp_status); \
4793 } \
4794 if (vl > 0) { \
4795 *((TD *)vd + HD(0)) = s1; \
4796 } \
4797 env->vstart = 0; \
4798 /* set tail elements to 1s */ \
4799 vext_set_elems_1s(vd, vta, esz, vlenb); \
4800 }
4801
4802 /* Unordered sum */
4803 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4804 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4805 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4806
4807 /* Ordered sum */
4808 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4809 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4810 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4811
4812 /* Maximum value */
4813 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4814 float16_maximum_number)
4815 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4816 float32_maximum_number)
4817 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4818 float64_maximum_number)
4819
4820 /* Minimum value */
4821 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4822 float16_minimum_number)
4823 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4824 float32_minimum_number)
4825 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4826 float64_minimum_number)
4827
4828 /* Vector Widening Floating-Point Add Instructions */
4829 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4830 {
4831 return float32_add(a, float16_to_float32(b, true, s), s);
4832 }
4833
fwadd32(uint64_t a,uint32_t b,float_status * s)4834 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4835 {
4836 return float64_add(a, float32_to_float64(b, s), s);
4837 }
4838
4839 /* Vector Widening Floating-Point Reduction Instructions */
4840 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
GEN_VEXT_FRED(vfwredusum_vs_h,uint32_t,uint16_t,H4,H2,fwadd16)4841 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4842 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4843 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4844 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4845
4846 /*
4847 * Vector Mask Operations
4848 */
4849 /* Vector Mask-Register Logical Instructions */
4850 #define GEN_VEXT_MASK_VV(NAME, OP) \
4851 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4852 void *vs2, CPURISCVState *env, \
4853 uint32_t desc) \
4854 { \
4855 uint32_t vl = env->vl; \
4856 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4857 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4858 uint32_t i; \
4859 int a, b; \
4860 \
4861 VSTART_CHECK_EARLY_EXIT(env, vl); \
4862 \
4863 for (i = env->vstart; i < vl; i++) { \
4864 a = vext_elem_mask(vs1, i); \
4865 b = vext_elem_mask(vs2, i); \
4866 vext_set_elem_mask(vd, i, OP(b, a)); \
4867 } \
4868 env->vstart = 0; \
4869 /*
4870 * mask destination register are always tail-agnostic
4871 * set tail elements to 1s
4872 */ \
4873 if (vta_all_1s) { \
4874 for (; i < total_elems; i++) { \
4875 vext_set_elem_mask(vd, i, 1); \
4876 } \
4877 } \
4878 }
4879
4880 #define DO_NAND(N, M) (!(N & M))
4881 #define DO_ANDNOT(N, M) (N & !M)
4882 #define DO_NOR(N, M) (!(N | M))
4883 #define DO_ORNOT(N, M) (N | !M)
4884 #define DO_XNOR(N, M) (!(N ^ M))
4885
4886 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4887 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4888 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4889 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4890 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4891 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4892 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4893 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4894
4895 /* Vector count population in mask vcpop */
4896 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4897 uint32_t desc)
4898 {
4899 target_ulong cnt = 0;
4900 uint32_t vm = vext_vm(desc);
4901 uint32_t vl = env->vl;
4902 int i;
4903
4904 for (i = env->vstart; i < vl; i++) {
4905 if (vm || vext_elem_mask(v0, i)) {
4906 if (vext_elem_mask(vs2, i)) {
4907 cnt++;
4908 }
4909 }
4910 }
4911 env->vstart = 0;
4912 return cnt;
4913 }
4914
4915 /* vfirst find-first-set mask bit */
HELPER(vfirst_m)4916 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4917 uint32_t desc)
4918 {
4919 uint32_t vm = vext_vm(desc);
4920 uint32_t vl = env->vl;
4921 int i;
4922
4923 for (i = env->vstart; i < vl; i++) {
4924 if (vm || vext_elem_mask(v0, i)) {
4925 if (vext_elem_mask(vs2, i)) {
4926 return i;
4927 }
4928 }
4929 }
4930 env->vstart = 0;
4931 return -1LL;
4932 }
4933
4934 enum set_mask_type {
4935 ONLY_FIRST = 1,
4936 INCLUDE_FIRST,
4937 BEFORE_FIRST,
4938 };
4939
vmsetm(void * vd,void * v0,void * vs2,CPURISCVState * env,uint32_t desc,enum set_mask_type type)4940 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4941 uint32_t desc, enum set_mask_type type)
4942 {
4943 uint32_t vm = vext_vm(desc);
4944 uint32_t vl = env->vl;
4945 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4946 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4947 uint32_t vma = vext_vma(desc);
4948 int i;
4949 bool first_mask_bit = false;
4950
4951 VSTART_CHECK_EARLY_EXIT(env, vl);
4952
4953 for (i = env->vstart; i < vl; i++) {
4954 if (!vm && !vext_elem_mask(v0, i)) {
4955 /* set masked-off elements to 1s */
4956 if (vma) {
4957 vext_set_elem_mask(vd, i, 1);
4958 }
4959 continue;
4960 }
4961 /* write a zero to all following active elements */
4962 if (first_mask_bit) {
4963 vext_set_elem_mask(vd, i, 0);
4964 continue;
4965 }
4966 if (vext_elem_mask(vs2, i)) {
4967 first_mask_bit = true;
4968 if (type == BEFORE_FIRST) {
4969 vext_set_elem_mask(vd, i, 0);
4970 } else {
4971 vext_set_elem_mask(vd, i, 1);
4972 }
4973 } else {
4974 if (type == ONLY_FIRST) {
4975 vext_set_elem_mask(vd, i, 0);
4976 } else {
4977 vext_set_elem_mask(vd, i, 1);
4978 }
4979 }
4980 }
4981 env->vstart = 0;
4982 /*
4983 * mask destination register are always tail-agnostic
4984 * set tail elements to 1s
4985 */
4986 if (vta_all_1s) {
4987 for (; i < total_elems; i++) {
4988 vext_set_elem_mask(vd, i, 1);
4989 }
4990 }
4991 }
4992
HELPER(vmsbf_m)4993 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4994 uint32_t desc)
4995 {
4996 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4997 }
4998
HELPER(vmsif_m)4999 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
5000 uint32_t desc)
5001 {
5002 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
5003 }
5004
HELPER(vmsof_m)5005 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
5006 uint32_t desc)
5007 {
5008 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
5009 }
5010
5011 /* Vector Iota Instruction */
5012 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
5013 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
5014 uint32_t desc) \
5015 { \
5016 uint32_t vm = vext_vm(desc); \
5017 uint32_t vl = env->vl; \
5018 uint32_t esz = sizeof(ETYPE); \
5019 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5020 uint32_t vta = vext_vta(desc); \
5021 uint32_t vma = vext_vma(desc); \
5022 uint32_t sum = 0; \
5023 int i; \
5024 \
5025 VSTART_CHECK_EARLY_EXIT(env, vl); \
5026 \
5027 for (i = env->vstart; i < vl; i++) { \
5028 if (!vm && !vext_elem_mask(v0, i)) { \
5029 /* set masked-off elements to 1s */ \
5030 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5031 continue; \
5032 } \
5033 *((ETYPE *)vd + H(i)) = sum; \
5034 if (vext_elem_mask(vs2, i)) { \
5035 sum++; \
5036 } \
5037 } \
5038 env->vstart = 0; \
5039 /* set tail elements to 1s */ \
5040 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5041 }
5042
GEN_VEXT_VIOTA_M(viota_m_b,uint8_t,H1)5043 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
5044 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
5045 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
5046 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
5047
5048 /* Vector Element Index Instruction */
5049 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
5050 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
5051 { \
5052 uint32_t vm = vext_vm(desc); \
5053 uint32_t vl = env->vl; \
5054 uint32_t esz = sizeof(ETYPE); \
5055 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5056 uint32_t vta = vext_vta(desc); \
5057 uint32_t vma = vext_vma(desc); \
5058 int i; \
5059 \
5060 VSTART_CHECK_EARLY_EXIT(env, vl); \
5061 \
5062 for (i = env->vstart; i < vl; i++) { \
5063 if (!vm && !vext_elem_mask(v0, i)) { \
5064 /* set masked-off elements to 1s */ \
5065 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5066 continue; \
5067 } \
5068 *((ETYPE *)vd + H(i)) = i; \
5069 } \
5070 env->vstart = 0; \
5071 /* set tail elements to 1s */ \
5072 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5073 }
5074
5075 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
5076 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
5077 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
5078 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
5079
5080 /*
5081 * Vector Permutation Instructions
5082 */
5083
5084 /* Vector Slide Instructions */
5085 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
5086 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5087 CPURISCVState *env, uint32_t desc) \
5088 { \
5089 uint32_t vm = vext_vm(desc); \
5090 uint32_t vl = env->vl; \
5091 uint32_t esz = sizeof(ETYPE); \
5092 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5093 uint32_t vta = vext_vta(desc); \
5094 uint32_t vma = vext_vma(desc); \
5095 target_ulong offset = s1, i_min, i; \
5096 \
5097 VSTART_CHECK_EARLY_EXIT(env, vl); \
5098 \
5099 i_min = MAX(env->vstart, offset); \
5100 for (i = i_min; i < vl; i++) { \
5101 if (!vm && !vext_elem_mask(v0, i)) { \
5102 /* set masked-off elements to 1s */ \
5103 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5104 continue; \
5105 } \
5106 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5107 } \
5108 env->vstart = 0; \
5109 /* set tail elements to 1s */ \
5110 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5111 }
5112
5113 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
5114 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5115 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5116 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5117 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
5118
5119 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
5120 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5121 CPURISCVState *env, uint32_t desc) \
5122 { \
5123 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5124 uint32_t vm = vext_vm(desc); \
5125 uint32_t vl = env->vl; \
5126 uint32_t esz = sizeof(ETYPE); \
5127 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5128 uint32_t vta = vext_vta(desc); \
5129 uint32_t vma = vext_vma(desc); \
5130 target_ulong i_max, i_min, i; \
5131 \
5132 VSTART_CHECK_EARLY_EXIT(env, vl); \
5133 \
5134 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
5135 i_max = MAX(i_min, env->vstart); \
5136 for (i = env->vstart; i < i_max; ++i) { \
5137 if (!vm && !vext_elem_mask(v0, i)) { \
5138 /* set masked-off elements to 1s */ \
5139 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5140 continue; \
5141 } \
5142 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
5143 } \
5144 \
5145 for (i = i_max; i < vl; ++i) { \
5146 if (!vm && !vext_elem_mask(v0, i)) { \
5147 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5148 continue; \
5149 } \
5150 *((ETYPE *)vd + H(i)) = 0; \
5151 } \
5152 \
5153 env->vstart = 0; \
5154 /* set tail elements to 1s */ \
5155 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5156 }
5157
5158 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
5159 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5160 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5161 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5162 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
5163
5164 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5165 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5166 void *vs2, CPURISCVState *env, \
5167 uint32_t desc) \
5168 { \
5169 typedef uint##BITWIDTH##_t ETYPE; \
5170 uint32_t vm = vext_vm(desc); \
5171 uint32_t vl = env->vl; \
5172 uint32_t esz = sizeof(ETYPE); \
5173 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5174 uint32_t vta = vext_vta(desc); \
5175 uint32_t vma = vext_vma(desc); \
5176 uint32_t i; \
5177 \
5178 VSTART_CHECK_EARLY_EXIT(env, vl); \
5179 \
5180 for (i = env->vstart; i < vl; i++) { \
5181 if (!vm && !vext_elem_mask(v0, i)) { \
5182 /* set masked-off elements to 1s */ \
5183 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5184 continue; \
5185 } \
5186 if (i == 0) { \
5187 *((ETYPE *)vd + H(i)) = s1; \
5188 } else { \
5189 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5190 } \
5191 } \
5192 env->vstart = 0; \
5193 /* set tail elements to 1s */ \
5194 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5195 }
5196
5197 GEN_VEXT_VSLIE1UP(8, H1)
5198 GEN_VEXT_VSLIE1UP(16, H2)
5199 GEN_VEXT_VSLIE1UP(32, H4)
5200 GEN_VEXT_VSLIE1UP(64, H8)
5201
5202 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
5203 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5204 CPURISCVState *env, uint32_t desc) \
5205 { \
5206 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5207 }
5208
5209 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
5210 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5211 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5212 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5213 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5214
5215 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5216 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5217 void *vs2, CPURISCVState *env, \
5218 uint32_t desc) \
5219 { \
5220 typedef uint##BITWIDTH##_t ETYPE; \
5221 uint32_t vm = vext_vm(desc); \
5222 uint32_t vl = env->vl; \
5223 uint32_t esz = sizeof(ETYPE); \
5224 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5225 uint32_t vta = vext_vta(desc); \
5226 uint32_t vma = vext_vma(desc); \
5227 uint32_t i; \
5228 \
5229 VSTART_CHECK_EARLY_EXIT(env, vl); \
5230 \
5231 for (i = env->vstart; i < vl; i++) { \
5232 if (!vm && !vext_elem_mask(v0, i)) { \
5233 /* set masked-off elements to 1s */ \
5234 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5235 continue; \
5236 } \
5237 if (i == vl - 1) { \
5238 *((ETYPE *)vd + H(i)) = s1; \
5239 } else { \
5240 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5241 } \
5242 } \
5243 env->vstart = 0; \
5244 /* set tail elements to 1s */ \
5245 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5246 }
5247
5248 GEN_VEXT_VSLIDE1DOWN(8, H1)
5249 GEN_VEXT_VSLIDE1DOWN(16, H2)
5250 GEN_VEXT_VSLIDE1DOWN(32, H4)
5251 GEN_VEXT_VSLIDE1DOWN(64, H8)
5252
5253 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
5254 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5255 CPURISCVState *env, uint32_t desc) \
5256 { \
5257 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5258 }
5259
5260 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
5261 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5262 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5263 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5264 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5265
5266 /* Vector Floating-Point Slide Instructions */
5267 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5268 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5269 CPURISCVState *env, uint32_t desc) \
5270 { \
5271 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5272 }
5273
5274 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5275 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5276 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5277 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5278
5279 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5280 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5281 CPURISCVState *env, uint32_t desc) \
5282 { \
5283 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5284 }
5285
5286 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5287 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5288 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5289 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5290
5291 /* Vector Register Gather Instruction */
5292 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5293 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5294 CPURISCVState *env, uint32_t desc) \
5295 { \
5296 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5297 uint32_t vm = vext_vm(desc); \
5298 uint32_t vl = env->vl; \
5299 uint32_t esz = sizeof(TS2); \
5300 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5301 uint32_t vta = vext_vta(desc); \
5302 uint32_t vma = vext_vma(desc); \
5303 uint64_t index; \
5304 uint32_t i; \
5305 \
5306 VSTART_CHECK_EARLY_EXIT(env, vl); \
5307 \
5308 for (i = env->vstart; i < vl; i++) { \
5309 if (!vm && !vext_elem_mask(v0, i)) { \
5310 /* set masked-off elements to 1s */ \
5311 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5312 continue; \
5313 } \
5314 index = *((TS1 *)vs1 + HS1(i)); \
5315 if (index >= vlmax) { \
5316 *((TS2 *)vd + HS2(i)) = 0; \
5317 } else { \
5318 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5319 } \
5320 } \
5321 env->vstart = 0; \
5322 /* set tail elements to 1s */ \
5323 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5324 }
5325
5326 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5327 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5328 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5329 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5330 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5331
5332 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5333 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5334 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5335 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5336
5337 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5338 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5339 CPURISCVState *env, uint32_t desc) \
5340 { \
5341 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5342 uint32_t vm = vext_vm(desc); \
5343 uint32_t vl = env->vl; \
5344 uint32_t esz = sizeof(ETYPE); \
5345 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5346 uint32_t vta = vext_vta(desc); \
5347 uint32_t vma = vext_vma(desc); \
5348 uint64_t index = s1; \
5349 uint32_t i; \
5350 \
5351 VSTART_CHECK_EARLY_EXIT(env, vl); \
5352 \
5353 for (i = env->vstart; i < vl; i++) { \
5354 if (!vm && !vext_elem_mask(v0, i)) { \
5355 /* set masked-off elements to 1s */ \
5356 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5357 continue; \
5358 } \
5359 if (index >= vlmax) { \
5360 *((ETYPE *)vd + H(i)) = 0; \
5361 } else { \
5362 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5363 } \
5364 } \
5365 env->vstart = 0; \
5366 /* set tail elements to 1s */ \
5367 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5368 }
5369
5370 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5371 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5372 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5373 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5374 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5375
5376 /* Vector Compress Instruction */
5377 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5378 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5379 CPURISCVState *env, uint32_t desc) \
5380 { \
5381 uint32_t vl = env->vl; \
5382 uint32_t esz = sizeof(ETYPE); \
5383 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5384 uint32_t vta = vext_vta(desc); \
5385 uint32_t num = 0, i; \
5386 \
5387 VSTART_CHECK_EARLY_EXIT(env, vl); \
5388 \
5389 for (i = env->vstart; i < vl; i++) { \
5390 if (!vext_elem_mask(vs1, i)) { \
5391 continue; \
5392 } \
5393 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5394 num++; \
5395 } \
5396 env->vstart = 0; \
5397 /* set tail elements to 1s */ \
5398 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
5399 }
5400
5401 /* Compress into vd elements of vs2 where vs1 is enabled */
5402 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5403 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5404 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5405 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5406
5407 /* Vector Whole Register Move */
5408 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5409 {
5410 /* EEW = SEW */
5411 uint32_t maxsz = simd_maxsz(desc);
5412 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5413 uint32_t startb = env->vstart * sewb;
5414 uint32_t i = startb;
5415
5416 if (startb >= maxsz) {
5417 env->vstart = 0;
5418 return;
5419 }
5420
5421 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5422 uint32_t j = ROUND_UP(i, 8);
5423 memcpy((uint8_t *)vd + H1(j - 1),
5424 (uint8_t *)vs2 + H1(j - 1),
5425 j - i);
5426 i = j;
5427 }
5428
5429 memcpy((uint8_t *)vd + H1(i),
5430 (uint8_t *)vs2 + H1(i),
5431 maxsz - i);
5432
5433 env->vstart = 0;
5434 }
5435
5436 /* Vector Integer Extension */
5437 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5438 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5439 CPURISCVState *env, uint32_t desc) \
5440 { \
5441 uint32_t vl = env->vl; \
5442 uint32_t vm = vext_vm(desc); \
5443 uint32_t esz = sizeof(ETYPE); \
5444 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5445 uint32_t vta = vext_vta(desc); \
5446 uint32_t vma = vext_vma(desc); \
5447 uint32_t i; \
5448 \
5449 VSTART_CHECK_EARLY_EXIT(env, vl); \
5450 \
5451 for (i = env->vstart; i < vl; i++) { \
5452 if (!vm && !vext_elem_mask(v0, i)) { \
5453 /* set masked-off elements to 1s */ \
5454 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5455 continue; \
5456 } \
5457 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5458 } \
5459 env->vstart = 0; \
5460 /* set tail elements to 1s */ \
5461 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5462 }
5463
5464 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5465 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5466 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5467 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5468 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5469 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5470
5471 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5472 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5473 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5474 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5475 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5476 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
5477