1 /* 2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #ifndef HEXAGON_MMVEC_MACROS_H 19 #define HEXAGON_MMVEC_MACROS_H 20 21 #include "qemu/host-utils.h" 22 #include "arch.h" 23 #include "mmvec/system_ext_mmvec.h" 24 #include "accel/tcg/getpc.h" 25 #include "accel/tcg/probe.h" 26 27 #ifndef QEMU_GENERATE 28 #define VdV (*(MMVector *restrict)(VdV_void)) 29 #define VsV (*(MMVector *restrict)(VsV_void)) 30 #define VuV (*(MMVector *restrict)(VuV_void)) 31 #define VvV (*(MMVector *restrict)(VvV_void)) 32 #define VwV (*(MMVector *restrict)(VwV_void)) 33 #define VxV (*(MMVector *restrict)(VxV_void)) 34 #define VyV (*(MMVector *restrict)(VyV_void)) 35 36 #define VddV (*(MMVectorPair *restrict)(VddV_void)) 37 #define VuuV (*(MMVectorPair *restrict)(VuuV_void)) 38 #define VvvV (*(MMVectorPair *restrict)(VvvV_void)) 39 #define VxxV (*(MMVectorPair *restrict)(VxxV_void)) 40 41 #define QeV (*(MMQReg *restrict)(QeV_void)) 42 #define QdV (*(MMQReg *restrict)(QdV_void)) 43 #define QsV (*(MMQReg *restrict)(QsV_void)) 44 #define QtV (*(MMQReg *restrict)(QtV_void)) 45 #define QuV (*(MMQReg *restrict)(QuV_void)) 46 #define QvV (*(MMQReg *restrict)(QvV_void)) 47 #define QxV (*(MMQReg *restrict)(QxV_void)) 48 #endif 49 50 #define LOG_VTCM_BYTE(VA, MASK, VAL, IDX) \ 51 do { \ 52 env->vtcm_log.data.ub[IDX] = (VAL); \ 53 if (MASK) { \ 54 set_bit((IDX), env->vtcm_log.mask); \ 55 } else { \ 56 clear_bit((IDX), env->vtcm_log.mask); \ 57 } \ 58 env->vtcm_log.va[IDX] = (VA); \ 59 } while (0) 60 61 #define fNOTQ(VAL) \ 62 ({ \ 63 MMQReg _ret; \ 64 int _i_; \ 65 for (_i_ = 0; _i_ < fVECSIZE() / 64; _i_++) { \ 66 _ret.ud[_i_] = ~VAL.ud[_i_]; \ 67 } \ 68 _ret;\ 69 }) 70 #define fGETQBITS(REG, WIDTH, MASK, BITNO) \ 71 ((MASK) & (REG.w[(BITNO) >> 5] >> ((BITNO) & 0x1f))) 72 #define fGETQBIT(REG, BITNO) fGETQBITS(REG, 1, 1, BITNO) 73 #define fGENMASKW(QREG, IDX) \ 74 (((fGETQBIT(QREG, (IDX * 4 + 0)) ? 0xFF : 0x0) << 0) | \ 75 ((fGETQBIT(QREG, (IDX * 4 + 1)) ? 0xFF : 0x0) << 8) | \ 76 ((fGETQBIT(QREG, (IDX * 4 + 2)) ? 0xFF : 0x0) << 16) | \ 77 ((fGETQBIT(QREG, (IDX * 4 + 3)) ? 0xFF : 0x0) << 24)) 78 #define fGETNIBBLE(IDX, SRC) (fSXTN(4, 8, (SRC >> (4 * IDX)) & 0xF)) 79 #define fGETCRUMB(IDX, SRC) (fSXTN(2, 8, (SRC >> (2 * IDX)) & 0x3)) 80 #define fGETCRUMB_SYMMETRIC(IDX, SRC) \ 81 ((fGETCRUMB(IDX, SRC) >= 0 ? (2 - fGETCRUMB(IDX, SRC)) \ 82 : fGETCRUMB(IDX, SRC))) 83 #define fGENMASKH(QREG, IDX) \ 84 (((fGETQBIT(QREG, (IDX * 2 + 0)) ? 0xFF : 0x0) << 0) | \ 85 ((fGETQBIT(QREG, (IDX * 2 + 1)) ? 0xFF : 0x0) << 8)) 86 #define fGETMASKW(VREG, QREG, IDX) (VREG.w[IDX] & fGENMASKW((QREG), IDX)) 87 #define fGETMASKH(VREG, QREG, IDX) (VREG.h[IDX] & fGENMASKH((QREG), IDX)) 88 #define fCONDMASK8(QREG, IDX, YESVAL, NOVAL) \ 89 (fGETQBIT(QREG, IDX) ? (YESVAL) : (NOVAL)) 90 #define fCONDMASK16(QREG, IDX, YESVAL, NOVAL) \ 91 ((fGENMASKH(QREG, IDX) & (YESVAL)) | \ 92 (fGENMASKH(fNOTQ(QREG), IDX) & (NOVAL))) 93 #define fCONDMASK32(QREG, IDX, YESVAL, NOVAL) \ 94 ((fGENMASKW(QREG, IDX) & (YESVAL)) | \ 95 (fGENMASKW(fNOTQ(QREG), IDX) & (NOVAL))) 96 #define fSETQBITS(REG, WIDTH, MASK, BITNO, VAL) \ 97 do { \ 98 uint32_t __TMP = (VAL); \ 99 REG.w[(BITNO) >> 5] &= ~((MASK) << ((BITNO) & 0x1f)); \ 100 REG.w[(BITNO) >> 5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f)); \ 101 } while (0) 102 #define fSETQBIT(REG, BITNO, VAL) fSETQBITS(REG, 1, 1, BITNO, VAL) 103 #define fVBYTES() (fVECSIZE()) 104 #define fVALIGN(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR & ~(LOG2_ALIGNMENT - 1)) 105 #define fVLASTBYTE(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR | (LOG2_ALIGNMENT - 1)) 106 #define fVELEM(WIDTH) ((fVECSIZE() * 8) / WIDTH) 107 #define fVECLOGSIZE() (7) 108 #define fVECSIZE() (1 << fVECLOGSIZE()) 109 #define fSWAPB(A, B) do { uint8_t tmp = A; A = B; B = tmp; } while (0) 110 #define fV_AL_CHECK(EA, MASK) \ 111 if ((EA) & (MASK)) { \ 112 warn("aligning misaligned vector. EA=%08x", (EA)); \ 113 } 114 #define fSCATTER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \ 115 mem_vector_scatter_init(env) 116 #define fGATHER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \ 117 mem_vector_gather_init(env) 118 #define fSCATTER_FINISH(OP) 119 #define fGATHER_FINISH() 120 #define fLOG_SCATTER_OP(SIZE) \ 121 do { \ 122 env->vtcm_log.op = true; \ 123 env->vtcm_log.op_size = SIZE; \ 124 } while (0) 125 #define fVLOG_VTCM_WORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \ 126 do { \ 127 int log_byte = 0; \ 128 target_ulong va = EA; \ 129 target_ulong va_high = EA + LEN; \ 130 for (int i0 = 0; i0 < 4; i0++) { \ 131 log_byte = (va + i0) <= va_high; \ 132 LOG_VTCM_BYTE(va + i0, log_byte, INC. ub[4 * IDX + i0], \ 133 4 * IDX + i0); \ 134 } \ 135 } while (0) 136 #define fVLOG_VTCM_HALFWORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \ 137 do { \ 138 int log_byte = 0; \ 139 target_ulong va = EA; \ 140 target_ulong va_high = EA + LEN; \ 141 for (int i0 = 0; i0 < 2; i0++) { \ 142 log_byte = (va + i0) <= va_high; \ 143 LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \ 144 2 * IDX + i0); \ 145 } \ 146 } while (0) 147 148 #define fVLOG_VTCM_HALFWORD_INCREMENT_DV(EA, OFFSET, INC, IDX, IDX2, IDX_H, \ 149 ALIGNMENT, LEN) \ 150 do { \ 151 int log_byte = 0; \ 152 target_ulong va = EA; \ 153 target_ulong va_high = EA + LEN; \ 154 for (int i0 = 0; i0 < 2; i0++) { \ 155 log_byte = (va + i0) <= va_high; \ 156 LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \ 157 2 * IDX + i0); \ 158 } \ 159 } while (0) 160 161 /* NOTE - Will this always be tmp_VRegs[0]; */ 162 #define GATHER_FUNCTION(EA, OFFSET, IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL) \ 163 do { \ 164 int i0; \ 165 target_ulong va = EA; \ 166 target_ulong va_high = EA + LEN; \ 167 uintptr_t ra = GETPC(); \ 168 int log_byte = 0; \ 169 for (i0 = 0; i0 < ELEMENT_SIZE; i0++) { \ 170 log_byte = ((va + i0) <= va_high) && QVAL; \ 171 uint8_t B; \ 172 B = cpu_ldub_data_ra(env, EA + i0, ra); \ 173 env->tmp_VRegs[0].ub[ELEMENT_SIZE * IDX + i0] = B; \ 174 LOG_VTCM_BYTE(va + i0, log_byte, B, ELEMENT_SIZE * IDX + i0); \ 175 } \ 176 } while (0) 177 #define fVLOG_VTCM_GATHER_WORD(EA, OFFSET, IDX, LEN) \ 178 do { \ 179 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1); \ 180 } while (0) 181 #define fVLOG_VTCM_GATHER_HALFWORD(EA, OFFSET, IDX, LEN) \ 182 do { \ 183 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1); \ 184 } while (0) 185 #define fVLOG_VTCM_GATHER_HALFWORD_DV(EA, OFFSET, IDX, IDX2, IDX_H, LEN) \ 186 do { \ 187 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), 1); \ 188 } while (0) 189 #define fVLOG_VTCM_GATHER_WORDQ(EA, OFFSET, IDX, Q, LEN) \ 190 do { \ 191 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \ 192 fGETQBIT(QsV, 4 * IDX + i0)); \ 193 } while (0) 194 #define fVLOG_VTCM_GATHER_HALFWORDQ(EA, OFFSET, IDX, Q, LEN) \ 195 do { \ 196 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \ 197 fGETQBIT(QsV, 2 * IDX + i0)); \ 198 } while (0) 199 #define fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA, OFFSET, IDX, IDX2, IDX_H, Q, LEN) \ 200 do { \ 201 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \ 202 fGETQBIT(QsV, 2 * IDX + i0)); \ 203 } while (0) 204 #define SCATTER_OP_WRITE_TO_MEM(TYPE) \ 205 do { \ 206 ra = GETPC(); \ 207 for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \ 208 if (test_bit(i, env->vtcm_log.mask)) { \ 209 TYPE dst = 0; \ 210 TYPE inc = 0; \ 211 for (int j = 0; j < sizeof(TYPE); j++) { \ 212 uint8_t val; \ 213 val = cpu_ldub_data_ra(env, env->vtcm_log.va[i + j], ra); \ 214 dst |= val << (8 * j); \ 215 inc |= env->vtcm_log.data.ub[j + i] << (8 * j); \ 216 clear_bit(j + i, env->vtcm_log.mask); \ 217 env->vtcm_log.data.ub[j + i] = 0; \ 218 } \ 219 dst += inc; \ 220 for (int j = 0; j < sizeof(TYPE); j++) { \ 221 cpu_stb_data_ra(env, env->vtcm_log.va[i + j], \ 222 (dst >> (8 * j)) & 0xFF, ra); \ 223 } \ 224 } \ 225 } \ 226 } while (0) 227 #define SCATTER_OP_PROBE_MEM(TYPE, MMU_IDX, RETADDR) \ 228 do { \ 229 for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \ 230 if (test_bit(i, env->vtcm_log.mask)) { \ 231 for (int j = 0; j < sizeof(TYPE); j++) { \ 232 probe_read(env, env->vtcm_log.va[i + j], 1, \ 233 MMU_IDX, RETADDR); \ 234 probe_write(env, env->vtcm_log.va[i + j], 1, \ 235 MMU_IDX, RETADDR); \ 236 } \ 237 } \ 238 } \ 239 } while (0) 240 #define SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, ELEM_SIZE, BANK_IDX, QVAL, IN) \ 241 do { \ 242 int i0; \ 243 target_ulong va = EA; \ 244 target_ulong va_high = EA + LEN; \ 245 int log_byte = 0; \ 246 for (i0 = 0; i0 < ELEM_SIZE; i0++) { \ 247 log_byte = ((va + i0) <= va_high) && QVAL; \ 248 LOG_VTCM_BYTE(va + i0, log_byte, IN.ub[ELEM_SIZE * IDX + i0], \ 249 ELEM_SIZE * IDX + i0); \ 250 } \ 251 } while (0) 252 #define fVLOG_VTCM_HALFWORD(EA, OFFSET, IN, IDX, LEN) \ 253 do { \ 254 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1, IN); \ 255 } while (0) 256 #define fVLOG_VTCM_WORD(EA, OFFSET, IN, IDX, LEN) \ 257 do { \ 258 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1, IN); \ 259 } while (0) 260 #define fVLOG_VTCM_HALFWORDQ(EA, OFFSET, IN, IDX, Q, LEN) \ 261 do { \ 262 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \ 263 fGETQBIT(QsV, 2 * IDX + i0), IN); \ 264 } while (0) 265 #define fVLOG_VTCM_WORDQ(EA, OFFSET, IN, IDX, Q, LEN) \ 266 do { \ 267 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \ 268 fGETQBIT(QsV, 4 * IDX + i0), IN); \ 269 } while (0) 270 #define fVLOG_VTCM_HALFWORD_DV(EA, OFFSET, IN, IDX, IDX2, IDX_H, LEN) \ 271 do { \ 272 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, \ 273 (2 * IDX2 + IDX_H), 1, IN); \ 274 } while (0) 275 #define fVLOG_VTCM_HALFWORDQ_DV(EA, OFFSET, IN, IDX, Q, IDX2, IDX_H, LEN) \ 276 do { \ 277 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \ 278 fGETQBIT(QsV, 2 * IDX + i0), IN); \ 279 } while (0) 280 #define fSTORERELEASE(EA, TYPE) \ 281 do { \ 282 fV_AL_CHECK(EA, fVECSIZE() - 1); \ 283 } while (0) 284 #ifdef QEMU_GENERATE 285 #define fLOADMMV(EA, DST) gen_vreg_load(ctx, DST##_off, EA, true) 286 #endif 287 #ifdef QEMU_GENERATE 288 #define fLOADMMVU(EA, DST) gen_vreg_load(ctx, DST##_off, EA, false) 289 #endif 290 #ifdef QEMU_GENERATE 291 #define fSTOREMMV(EA, SRC) \ 292 gen_vreg_store(ctx, EA, SRC##_off, insn->slot, true) 293 #endif 294 #ifdef QEMU_GENERATE 295 #define fSTOREMMVQ(EA, SRC, MASK) \ 296 gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, false) 297 #endif 298 #ifdef QEMU_GENERATE 299 #define fSTOREMMVNQ(EA, SRC, MASK) \ 300 gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, true) 301 #endif 302 #ifdef QEMU_GENERATE 303 #define fSTOREMMVU(EA, SRC) \ 304 gen_vreg_store(ctx, EA, SRC##_off, insn->slot, false) 305 #endif 306 #define fVFOREACH(WIDTH, VAR) for (VAR = 0; VAR < fVELEM(WIDTH); VAR++) 307 #define fVARRAY_ELEMENT_ACCESS(ARRAY, TYPE, INDEX) \ 308 ARRAY.v[(INDEX) / (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % \ 309 (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))] 310 311 #define fVSATDW(U, V) fVSATW(((((long long)U) << 32) | fZXTN(32, 64, V))) 312 #define fVASL_SATHI(U, V) fVSATW(((U) << 1) | ((V) >> 31)) 313 #define fVUADDSAT(WIDTH, U, V) \ 314 fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V)) 315 #define fVSADDSAT(WIDTH, U, V) \ 316 fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V)) 317 #define fVUSUBSAT(WIDTH, U, V) \ 318 fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V)) 319 #define fVSSUBSAT(WIDTH, U, V) \ 320 fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V)) 321 #define fVAVGU(WIDTH, U, V) \ 322 ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V)) >> 1) 323 #define fVAVGURND(WIDTH, U, V) \ 324 ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1) 325 #define fVNAVGU(WIDTH, U, V) \ 326 ((fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V)) >> 1) 327 #define fVNAVGURNDSAT(WIDTH, U, V) \ 328 fVSATUN(WIDTH, ((fZXTN(WIDTH, 2 * WIDTH, U) - \ 329 fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)) 330 #define fVAVGS(WIDTH, U, V) \ 331 ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V)) >> 1) 332 #define fVAVGSRND(WIDTH, U, V) \ 333 ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1) 334 #define fVNAVGS(WIDTH, U, V) \ 335 ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V)) >> 1) 336 #define fVNAVGSRND(WIDTH, U, V) \ 337 ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1) 338 #define fVNAVGSRNDSAT(WIDTH, U, V) \ 339 fVSATN(WIDTH, ((fSXTN(WIDTH, 2 * WIDTH, U) - \ 340 fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)) 341 #define fVNOROUND(VAL, SHAMT) VAL 342 #define fVNOSAT(VAL) VAL 343 #define fVROUND(VAL, SHAMT) \ 344 ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0)) 345 #define fCARRY_FROM_ADD32(A, B, C) \ 346 (((fZXTN(32, 64, A) + fZXTN(32, 64, B) + C) >> 32) & 1) 347 #define fUARCH_NOTE_PUMP_4X() 348 #define fUARCH_NOTE_PUMP_2X() 349 350 #define IV1DEAD() 351 352 #define fGET10BIT(COE, VAL, POS) \ 353 do { \ 354 COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \ 355 extract32(VAL, POS * 8, 8); \ 356 } while (0); 357 358 #endif 359