xref: /qemu/target/hexagon/mmvec/macros.h (revision 7cef6d686309e2792186504ae17cf4f3eb57ef68)
1 /*
2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef HEXAGON_MMVEC_MACROS_H
19 #define HEXAGON_MMVEC_MACROS_H
20 
21 #include "qemu/host-utils.h"
22 #include "arch.h"
23 #include "mmvec/system_ext_mmvec.h"
24 #include "accel/tcg/getpc.h"
25 #include "accel/tcg/probe.h"
26 
27 #ifndef QEMU_GENERATE
28 #define VdV      (*(MMVector *restrict)(VdV_void))
29 #define VsV      (*(MMVector *restrict)(VsV_void))
30 #define VuV      (*(MMVector *restrict)(VuV_void))
31 #define VvV      (*(MMVector *restrict)(VvV_void))
32 #define VwV      (*(MMVector *restrict)(VwV_void))
33 #define VxV      (*(MMVector *restrict)(VxV_void))
34 #define VyV      (*(MMVector *restrict)(VyV_void))
35 
36 #define VddV     (*(MMVectorPair *restrict)(VddV_void))
37 #define VuuV     (*(MMVectorPair *restrict)(VuuV_void))
38 #define VvvV     (*(MMVectorPair *restrict)(VvvV_void))
39 #define VxxV     (*(MMVectorPair *restrict)(VxxV_void))
40 
41 #define QeV      (*(MMQReg *restrict)(QeV_void))
42 #define QdV      (*(MMQReg *restrict)(QdV_void))
43 #define QsV      (*(MMQReg *restrict)(QsV_void))
44 #define QtV      (*(MMQReg *restrict)(QtV_void))
45 #define QuV      (*(MMQReg *restrict)(QuV_void))
46 #define QvV      (*(MMQReg *restrict)(QvV_void))
47 #define QxV      (*(MMQReg *restrict)(QxV_void))
48 #endif
49 
50 #define LOG_VTCM_BYTE(VA, MASK, VAL, IDX) \
51     do { \
52         env->vtcm_log.data.ub[IDX] = (VAL); \
53         if (MASK) { \
54             set_bit((IDX), env->vtcm_log.mask); \
55         } else { \
56             clear_bit((IDX), env->vtcm_log.mask); \
57         } \
58         env->vtcm_log.va[IDX] = (VA); \
59     } while (0)
60 
61 #define fNOTQ(VAL) \
62     ({ \
63         MMQReg _ret;  \
64         int _i_;  \
65         for (_i_ = 0; _i_ < fVECSIZE() / 64; _i_++) { \
66             _ret.ud[_i_] = ~VAL.ud[_i_]; \
67         } \
68         _ret;\
69      })
70 #define fGETQBITS(REG, WIDTH, MASK, BITNO) \
71     ((MASK) & (REG.w[(BITNO) >> 5] >> ((BITNO) & 0x1f)))
72 #define fGETQBIT(REG, BITNO) fGETQBITS(REG, 1, 1, BITNO)
73 #define fGENMASKW(QREG, IDX) \
74     (((fGETQBIT(QREG, (IDX * 4 + 0)) ? 0xFF : 0x0) << 0)  | \
75      ((fGETQBIT(QREG, (IDX * 4 + 1)) ? 0xFF : 0x0) << 8)  | \
76      ((fGETQBIT(QREG, (IDX * 4 + 2)) ? 0xFF : 0x0) << 16) | \
77      ((fGETQBIT(QREG, (IDX * 4 + 3)) ? 0xFF : 0x0) << 24))
78 #define fGETNIBBLE(IDX, SRC) (fSXTN(4, 8, (SRC >> (4 * IDX)) & 0xF))
79 #define fGETCRUMB(IDX, SRC) (fSXTN(2, 8, (SRC >> (2 * IDX)) & 0x3))
80 #define fGETCRUMB_SYMMETRIC(IDX, SRC) \
81     ((fGETCRUMB(IDX, SRC) >= 0 ? (2 - fGETCRUMB(IDX, SRC)) \
82                                : fGETCRUMB(IDX, SRC)))
83 #define fGENMASKH(QREG, IDX) \
84     (((fGETQBIT(QREG, (IDX * 2 + 0)) ? 0xFF : 0x0) << 0) | \
85      ((fGETQBIT(QREG, (IDX * 2 + 1)) ? 0xFF : 0x0) << 8))
86 #define fGETMASKW(VREG, QREG, IDX) (VREG.w[IDX] & fGENMASKW((QREG), IDX))
87 #define fGETMASKH(VREG, QREG, IDX) (VREG.h[IDX] & fGENMASKH((QREG), IDX))
88 #define fCONDMASK8(QREG, IDX, YESVAL, NOVAL) \
89     (fGETQBIT(QREG, IDX) ? (YESVAL) : (NOVAL))
90 #define fCONDMASK16(QREG, IDX, YESVAL, NOVAL) \
91     ((fGENMASKH(QREG, IDX) & (YESVAL)) | \
92      (fGENMASKH(fNOTQ(QREG), IDX) & (NOVAL)))
93 #define fCONDMASK32(QREG, IDX, YESVAL, NOVAL) \
94     ((fGENMASKW(QREG, IDX) & (YESVAL)) | \
95      (fGENMASKW(fNOTQ(QREG), IDX) & (NOVAL)))
96 #define fSETQBITS(REG, WIDTH, MASK, BITNO, VAL) \
97     do { \
98         uint32_t __TMP = (VAL); \
99         REG.w[(BITNO) >> 5] &= ~((MASK) << ((BITNO) & 0x1f)); \
100         REG.w[(BITNO) >> 5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f)); \
101     } while (0)
102 #define fSETQBIT(REG, BITNO, VAL) fSETQBITS(REG, 1, 1, BITNO, VAL)
103 #define fVBYTES() (fVECSIZE())
104 #define fVALIGN(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR & ~(LOG2_ALIGNMENT - 1))
105 #define fVLASTBYTE(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR | (LOG2_ALIGNMENT - 1))
106 #define fVELEM(WIDTH) ((fVECSIZE() * 8) / WIDTH)
107 #define fVECLOGSIZE() (7)
108 #define fVECSIZE() (1 << fVECLOGSIZE())
109 #define fSWAPB(A, B) do { uint8_t tmp = A; A = B; B = tmp; } while (0)
110 #define fV_AL_CHECK(EA, MASK) \
111     if ((EA) & (MASK)) { \
112         warn("aligning misaligned vector. EA=%08x", (EA)); \
113     }
114 #define fSCATTER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \
115     mem_vector_scatter_init(env)
116 #define fGATHER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \
117     mem_vector_gather_init(env)
118 #define fSCATTER_FINISH(OP)
119 #define fGATHER_FINISH()
120 #define fLOG_SCATTER_OP(SIZE) \
121     do { \
122         env->vtcm_log.op = true; \
123         env->vtcm_log.op_size = SIZE; \
124     } while (0)
125 #define fVLOG_VTCM_WORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \
126     do { \
127         int log_byte = 0; \
128         target_ulong va = EA; \
129         target_ulong va_high = EA + LEN; \
130         for (int i0 = 0; i0 < 4; i0++) { \
131             log_byte = (va + i0) <= va_high; \
132             LOG_VTCM_BYTE(va + i0, log_byte, INC. ub[4 * IDX + i0], \
133                           4 * IDX + i0); \
134         } \
135     } while (0)
136 #define fVLOG_VTCM_HALFWORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \
137     do { \
138         int log_byte = 0; \
139         target_ulong va = EA; \
140         target_ulong va_high = EA + LEN; \
141         for (int i0 = 0; i0 < 2; i0++) { \
142             log_byte = (va + i0) <= va_high; \
143             LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \
144                           2 * IDX + i0); \
145         } \
146     } while (0)
147 
148 #define fVLOG_VTCM_HALFWORD_INCREMENT_DV(EA, OFFSET, INC, IDX, IDX2, IDX_H, \
149                                          ALIGNMENT, LEN) \
150     do { \
151         int log_byte = 0; \
152         target_ulong va = EA; \
153         target_ulong va_high = EA + LEN; \
154         for (int i0 = 0; i0 < 2; i0++) { \
155             log_byte = (va + i0) <= va_high; \
156             LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \
157                           2 * IDX + i0); \
158         } \
159     } while (0)
160 
161 /* NOTE - Will this always be tmp_VRegs[0]; */
162 #define GATHER_FUNCTION(EA, OFFSET, IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL) \
163     do { \
164         int i0; \
165         target_ulong va = EA; \
166         target_ulong va_high = EA + LEN; \
167         uintptr_t ra = GETPC(); \
168         int log_byte = 0; \
169         for (i0 = 0; i0 < ELEMENT_SIZE; i0++) { \
170             log_byte = ((va + i0) <= va_high) && QVAL; \
171             uint8_t B; \
172             B = cpu_ldub_data_ra(env, EA + i0, ra); \
173             env->tmp_VRegs[0].ub[ELEMENT_SIZE * IDX + i0] = B; \
174             LOG_VTCM_BYTE(va + i0, log_byte, B, ELEMENT_SIZE * IDX + i0); \
175         } \
176     } while (0)
177 #define fVLOG_VTCM_GATHER_WORD(EA, OFFSET, IDX, LEN) \
178     do { \
179         GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1); \
180     } while (0)
181 #define fVLOG_VTCM_GATHER_HALFWORD(EA, OFFSET, IDX, LEN) \
182     do { \
183         GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1); \
184     } while (0)
185 #define fVLOG_VTCM_GATHER_HALFWORD_DV(EA, OFFSET, IDX, IDX2, IDX_H, LEN) \
186     do { \
187         GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), 1); \
188     } while (0)
189 #define fVLOG_VTCM_GATHER_WORDQ(EA, OFFSET, IDX, Q, LEN) \
190     do { \
191         GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \
192                         fGETQBIT(QsV, 4 * IDX + i0)); \
193     } while (0)
194 #define fVLOG_VTCM_GATHER_HALFWORDQ(EA, OFFSET, IDX, Q, LEN) \
195     do { \
196         GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \
197                         fGETQBIT(QsV, 2 * IDX + i0)); \
198     } while (0)
199 #define fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA, OFFSET, IDX, IDX2, IDX_H, Q, LEN) \
200     do { \
201         GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \
202                         fGETQBIT(QsV, 2 * IDX + i0)); \
203     } while (0)
204 #define SCATTER_OP_WRITE_TO_MEM(TYPE) \
205     do { \
206         ra = GETPC(); \
207         for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \
208             if (test_bit(i, env->vtcm_log.mask)) { \
209                 TYPE dst = 0; \
210                 TYPE inc = 0; \
211                 for (int j = 0; j < sizeof(TYPE); j++) { \
212                     uint8_t val; \
213                     val = cpu_ldub_data_ra(env, env->vtcm_log.va[i + j], ra); \
214                     dst |= val << (8 * j); \
215                     inc |= env->vtcm_log.data.ub[j + i] << (8 * j); \
216                     clear_bit(j + i, env->vtcm_log.mask); \
217                     env->vtcm_log.data.ub[j + i] = 0; \
218                 } \
219                 dst += inc; \
220                 for (int j = 0; j < sizeof(TYPE); j++) { \
221                     cpu_stb_data_ra(env, env->vtcm_log.va[i + j], \
222                                     (dst >> (8 * j)) & 0xFF, ra); \
223                 } \
224             } \
225         } \
226     } while (0)
227 #define SCATTER_OP_PROBE_MEM(TYPE, MMU_IDX, RETADDR) \
228     do { \
229         for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \
230             if (test_bit(i, env->vtcm_log.mask)) { \
231                 for (int j = 0; j < sizeof(TYPE); j++) { \
232                     probe_read(env, env->vtcm_log.va[i + j], 1, \
233                                MMU_IDX, RETADDR); \
234                     probe_write(env, env->vtcm_log.va[i + j], 1, \
235                                 MMU_IDX, RETADDR); \
236                 } \
237             } \
238         } \
239     } while (0)
240 #define SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, ELEM_SIZE, BANK_IDX, QVAL, IN) \
241     do { \
242         int i0; \
243         target_ulong va = EA; \
244         target_ulong va_high = EA + LEN; \
245         int log_byte = 0; \
246         for (i0 = 0; i0 < ELEM_SIZE; i0++) { \
247             log_byte = ((va + i0) <= va_high) && QVAL; \
248             LOG_VTCM_BYTE(va + i0, log_byte, IN.ub[ELEM_SIZE * IDX + i0], \
249                           ELEM_SIZE * IDX + i0); \
250         } \
251     } while (0)
252 #define fVLOG_VTCM_HALFWORD(EA, OFFSET, IN, IDX, LEN) \
253     do { \
254         SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1, IN); \
255     } while (0)
256 #define fVLOG_VTCM_WORD(EA, OFFSET, IN, IDX, LEN) \
257     do { \
258         SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1, IN); \
259     } while (0)
260 #define fVLOG_VTCM_HALFWORDQ(EA, OFFSET, IN, IDX, Q, LEN) \
261     do { \
262         SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \
263                          fGETQBIT(QsV, 2 * IDX + i0), IN); \
264     } while (0)
265 #define fVLOG_VTCM_WORDQ(EA, OFFSET, IN, IDX, Q, LEN) \
266     do { \
267         SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \
268                          fGETQBIT(QsV, 4 * IDX + i0), IN); \
269     } while (0)
270 #define fVLOG_VTCM_HALFWORD_DV(EA, OFFSET, IN, IDX, IDX2, IDX_H, LEN) \
271     do { \
272         SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, \
273                          (2 * IDX2 + IDX_H), 1, IN); \
274     } while (0)
275 #define fVLOG_VTCM_HALFWORDQ_DV(EA, OFFSET, IN, IDX, Q, IDX2, IDX_H, LEN) \
276     do { \
277         SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \
278                          fGETQBIT(QsV, 2 * IDX + i0), IN); \
279     } while (0)
280 #define fSTORERELEASE(EA, TYPE) \
281     do { \
282         fV_AL_CHECK(EA, fVECSIZE() - 1); \
283     } while (0)
284 #ifdef QEMU_GENERATE
285 #define fLOADMMV(EA, DST) gen_vreg_load(ctx, DST##_off, EA, true)
286 #endif
287 #ifdef QEMU_GENERATE
288 #define fLOADMMVU(EA, DST) gen_vreg_load(ctx, DST##_off, EA, false)
289 #endif
290 #ifdef QEMU_GENERATE
291 #define fSTOREMMV(EA, SRC) \
292     gen_vreg_store(ctx, EA, SRC##_off, insn->slot, true)
293 #endif
294 #ifdef QEMU_GENERATE
295 #define fSTOREMMVQ(EA, SRC, MASK) \
296     gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, false)
297 #endif
298 #ifdef QEMU_GENERATE
299 #define fSTOREMMVNQ(EA, SRC, MASK) \
300     gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, true)
301 #endif
302 #ifdef QEMU_GENERATE
303 #define fSTOREMMVU(EA, SRC) \
304     gen_vreg_store(ctx, EA, SRC##_off, insn->slot, false)
305 #endif
306 #define fVFOREACH(WIDTH, VAR) for (VAR = 0; VAR < fVELEM(WIDTH); VAR++)
307 #define fVARRAY_ELEMENT_ACCESS(ARRAY, TYPE, INDEX) \
308     ARRAY.v[(INDEX) / (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % \
309     (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))]
310 
311 #define fVSATDW(U, V) fVSATW(((((long long)U) << 32) | fZXTN(32, 64, V)))
312 #define fVASL_SATHI(U, V) fVSATW(((U) << 1) | ((V) >> 31))
313 #define fVUADDSAT(WIDTH, U, V) \
314     fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V))
315 #define fVSADDSAT(WIDTH, U, V) \
316     fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V))
317 #define fVUSUBSAT(WIDTH, U, V) \
318     fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V))
319 #define fVSSUBSAT(WIDTH, U, V) \
320     fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V))
321 #define fVAVGU(WIDTH, U, V) \
322     ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V)) >> 1)
323 #define fVAVGURND(WIDTH, U, V) \
324     ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
325 #define fVNAVGU(WIDTH, U, V) \
326     ((fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V)) >> 1)
327 #define fVNAVGURNDSAT(WIDTH, U, V) \
328     fVSATUN(WIDTH, ((fZXTN(WIDTH, 2 * WIDTH, U) - \
329                      fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1))
330 #define fVAVGS(WIDTH, U, V) \
331     ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V)) >> 1)
332 #define fVAVGSRND(WIDTH, U, V) \
333     ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
334 #define fVNAVGS(WIDTH, U, V) \
335     ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V)) >> 1)
336 #define fVNAVGSRND(WIDTH, U, V) \
337     ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
338 #define fVNAVGSRNDSAT(WIDTH, U, V) \
339     fVSATN(WIDTH, ((fSXTN(WIDTH, 2 * WIDTH, U) - \
340                     fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1))
341 #define fVNOROUND(VAL, SHAMT) VAL
342 #define fVNOSAT(VAL) VAL
343 #define fVROUND(VAL, SHAMT) \
344     ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))
345 #define fCARRY_FROM_ADD32(A, B, C) \
346     (((fZXTN(32, 64, A) + fZXTN(32, 64, B) + C) >> 32) & 1)
347 #define fUARCH_NOTE_PUMP_4X()
348 #define fUARCH_NOTE_PUMP_2X()
349 
350 #define IV1DEAD()
351 
352 #define fGET10BIT(COE, VAL, POS) \
353     do { \
354         COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \
355                extract32(VAL, POS * 8, 8); \
356     } while (0);
357 
358 #endif
359