1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9db432672SRichard Henderson * version 2 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24db432672SRichard Henderson #include "tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28db432672SRichard Henderson * them via GCC's generic vector extension. This turns out to be simpler and 29db432672SRichard Henderson * more reliable than getting the compiler to autovectorize. 30db432672SRichard Henderson * 31db432672SRichard Henderson * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32db432672SRichard Henderson * are multiples of 16. 33db432672SRichard Henderson * 34db432672SRichard Henderson * When the compiler does not support all of the operations we require, the 35db432672SRichard Henderson * loops are written so that we can always fall back on the base types. 36db432672SRichard Henderson */ 37db432672SRichard Henderson #ifdef CONFIG_VECTOR16 38db432672SRichard Henderson typedef uint8_t vec8 __attribute__((vector_size(16))); 39db432672SRichard Henderson typedef uint16_t vec16 __attribute__((vector_size(16))); 40db432672SRichard Henderson typedef uint32_t vec32 __attribute__((vector_size(16))); 41db432672SRichard Henderson typedef uint64_t vec64 __attribute__((vector_size(16))); 42db432672SRichard Henderson 43db432672SRichard Henderson typedef int8_t svec8 __attribute__((vector_size(16))); 44db432672SRichard Henderson typedef int16_t svec16 __attribute__((vector_size(16))); 45db432672SRichard Henderson typedef int32_t svec32 __attribute__((vector_size(16))); 46db432672SRichard Henderson typedef int64_t svec64 __attribute__((vector_size(16))); 47db432672SRichard Henderson 48db432672SRichard Henderson #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49db432672SRichard Henderson #define DUP8(X) { X, X, X, X, X, X, X, X } 50db432672SRichard Henderson #define DUP4(X) { X, X, X, X } 51db432672SRichard Henderson #define DUP2(X) { X, X } 52db432672SRichard Henderson #else 53db432672SRichard Henderson typedef uint8_t vec8; 54db432672SRichard Henderson typedef uint16_t vec16; 55db432672SRichard Henderson typedef uint32_t vec32; 56db432672SRichard Henderson typedef uint64_t vec64; 57db432672SRichard Henderson 58db432672SRichard Henderson typedef int8_t svec8; 59db432672SRichard Henderson typedef int16_t svec16; 60db432672SRichard Henderson typedef int32_t svec32; 61db432672SRichard Henderson typedef int64_t svec64; 62db432672SRichard Henderson 63db432672SRichard Henderson #define DUP16(X) X 64db432672SRichard Henderson #define DUP8(X) X 65db432672SRichard Henderson #define DUP4(X) X 66db432672SRichard Henderson #define DUP2(X) X 67db432672SRichard Henderson #endif /* CONFIG_VECTOR16 */ 68db432672SRichard Henderson 69db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70db432672SRichard Henderson { 71db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 72db432672SRichard Henderson intptr_t i; 73db432672SRichard Henderson 74db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 75db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 77db432672SRichard Henderson } 78db432672SRichard Henderson } 79db432672SRichard Henderson } 80db432672SRichard Henderson 81db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82db432672SRichard Henderson { 83db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 84db432672SRichard Henderson intptr_t i; 85db432672SRichard Henderson 86db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 87db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88db432672SRichard Henderson } 89db432672SRichard Henderson clear_high(d, oprsz, desc); 90db432672SRichard Henderson } 91db432672SRichard Henderson 92db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93db432672SRichard Henderson { 94db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 95db432672SRichard Henderson intptr_t i; 96db432672SRichard Henderson 97db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 98db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99db432672SRichard Henderson } 100db432672SRichard Henderson clear_high(d, oprsz, desc); 101db432672SRichard Henderson } 102db432672SRichard Henderson 103db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104db432672SRichard Henderson { 105db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 106db432672SRichard Henderson intptr_t i; 107db432672SRichard Henderson 108db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 109db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110db432672SRichard Henderson } 111db432672SRichard Henderson clear_high(d, oprsz, desc); 112db432672SRichard Henderson } 113db432672SRichard Henderson 114db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115db432672SRichard Henderson { 116db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 117db432672SRichard Henderson intptr_t i; 118db432672SRichard Henderson 119db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 120db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121db432672SRichard Henderson } 122db432672SRichard Henderson clear_high(d, oprsz, desc); 123db432672SRichard Henderson } 124db432672SRichard Henderson 125db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 126db432672SRichard Henderson { 127db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 128db432672SRichard Henderson intptr_t i; 129db432672SRichard Henderson 130db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 131db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 132db432672SRichard Henderson } 133db432672SRichard Henderson clear_high(d, oprsz, desc); 134db432672SRichard Henderson } 135db432672SRichard Henderson 136db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 137db432672SRichard Henderson { 138db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 139db432672SRichard Henderson intptr_t i; 140db432672SRichard Henderson 141db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 142db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 143db432672SRichard Henderson } 144db432672SRichard Henderson clear_high(d, oprsz, desc); 145db432672SRichard Henderson } 146db432672SRichard Henderson 147db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 148db432672SRichard Henderson { 149db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 150db432672SRichard Henderson intptr_t i; 151db432672SRichard Henderson 152db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 153db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 154db432672SRichard Henderson } 155db432672SRichard Henderson clear_high(d, oprsz, desc); 156db432672SRichard Henderson } 157db432672SRichard Henderson 158db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 159db432672SRichard Henderson { 160db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 161db432672SRichard Henderson intptr_t i; 162db432672SRichard Henderson 163db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 164db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 165db432672SRichard Henderson } 166db432672SRichard Henderson clear_high(d, oprsz, desc); 167db432672SRichard Henderson } 168db432672SRichard Henderson 169db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 170db432672SRichard Henderson { 171db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 172db432672SRichard Henderson intptr_t i; 173db432672SRichard Henderson 174db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 175db432672SRichard Henderson *(vec8 *)(d + i) = -*(vec8 *)(a + i); 176db432672SRichard Henderson } 177db432672SRichard Henderson clear_high(d, oprsz, desc); 178db432672SRichard Henderson } 179db432672SRichard Henderson 180db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 181db432672SRichard Henderson { 182db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 183db432672SRichard Henderson intptr_t i; 184db432672SRichard Henderson 185db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 186db432672SRichard Henderson *(vec16 *)(d + i) = -*(vec16 *)(a + i); 187db432672SRichard Henderson } 188db432672SRichard Henderson clear_high(d, oprsz, desc); 189db432672SRichard Henderson } 190db432672SRichard Henderson 191db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 192db432672SRichard Henderson { 193db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 194db432672SRichard Henderson intptr_t i; 195db432672SRichard Henderson 196db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 197db432672SRichard Henderson *(vec32 *)(d + i) = -*(vec32 *)(a + i); 198db432672SRichard Henderson } 199db432672SRichard Henderson clear_high(d, oprsz, desc); 200db432672SRichard Henderson } 201db432672SRichard Henderson 202db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 203db432672SRichard Henderson { 204db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 205db432672SRichard Henderson intptr_t i; 206db432672SRichard Henderson 207db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 208db432672SRichard Henderson *(vec64 *)(d + i) = -*(vec64 *)(a + i); 209db432672SRichard Henderson } 210db432672SRichard Henderson clear_high(d, oprsz, desc); 211db432672SRichard Henderson } 212db432672SRichard Henderson 213db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 214db432672SRichard Henderson { 215db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 216db432672SRichard Henderson 217db432672SRichard Henderson memcpy(d, a, oprsz); 218db432672SRichard Henderson clear_high(d, oprsz, desc); 219db432672SRichard Henderson } 220db432672SRichard Henderson 221db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 222db432672SRichard Henderson { 223db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 224db432672SRichard Henderson intptr_t i; 225db432672SRichard Henderson 226db432672SRichard Henderson if (c == 0) { 227db432672SRichard Henderson oprsz = 0; 228db432672SRichard Henderson } else { 229db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 230db432672SRichard Henderson *(uint64_t *)(d + i) = c; 231db432672SRichard Henderson } 232db432672SRichard Henderson } 233db432672SRichard Henderson clear_high(d, oprsz, desc); 234db432672SRichard Henderson } 235db432672SRichard Henderson 236db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 237db432672SRichard Henderson { 238db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 239db432672SRichard Henderson intptr_t i; 240db432672SRichard Henderson 241db432672SRichard Henderson if (c == 0) { 242db432672SRichard Henderson oprsz = 0; 243db432672SRichard Henderson } else { 244db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 245db432672SRichard Henderson *(uint32_t *)(d + i) = c; 246db432672SRichard Henderson } 247db432672SRichard Henderson } 248db432672SRichard Henderson clear_high(d, oprsz, desc); 249db432672SRichard Henderson } 250db432672SRichard Henderson 251db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 252db432672SRichard Henderson { 253db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 254db432672SRichard Henderson } 255db432672SRichard Henderson 256db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 257db432672SRichard Henderson { 258db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 259db432672SRichard Henderson } 260db432672SRichard Henderson 261db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 262db432672SRichard Henderson { 263db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 264db432672SRichard Henderson intptr_t i; 265db432672SRichard Henderson 266db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 267db432672SRichard Henderson *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 268db432672SRichard Henderson } 269db432672SRichard Henderson clear_high(d, oprsz, desc); 270db432672SRichard Henderson } 271db432672SRichard Henderson 272db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 273db432672SRichard Henderson { 274db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 275db432672SRichard Henderson intptr_t i; 276db432672SRichard Henderson 277db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 278db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 279db432672SRichard Henderson } 280db432672SRichard Henderson clear_high(d, oprsz, desc); 281db432672SRichard Henderson } 282db432672SRichard Henderson 283db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 284db432672SRichard Henderson { 285db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 286db432672SRichard Henderson intptr_t i; 287db432672SRichard Henderson 288db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 289db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 290db432672SRichard Henderson } 291db432672SRichard Henderson clear_high(d, oprsz, desc); 292db432672SRichard Henderson } 293db432672SRichard Henderson 294db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 295db432672SRichard Henderson { 296db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 297db432672SRichard Henderson intptr_t i; 298db432672SRichard Henderson 299db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 300db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 301db432672SRichard Henderson } 302db432672SRichard Henderson clear_high(d, oprsz, desc); 303db432672SRichard Henderson } 304db432672SRichard Henderson 305db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 306db432672SRichard Henderson { 307db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 308db432672SRichard Henderson intptr_t i; 309db432672SRichard Henderson 310db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 311db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 312db432672SRichard Henderson } 313db432672SRichard Henderson clear_high(d, oprsz, desc); 314db432672SRichard Henderson } 315db432672SRichard Henderson 316db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 317db432672SRichard Henderson { 318db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 319db432672SRichard Henderson intptr_t i; 320db432672SRichard Henderson 321db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 322db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 323db432672SRichard Henderson } 324db432672SRichard Henderson clear_high(d, oprsz, desc); 325db432672SRichard Henderson } 326*d0ec9796SRichard Henderson 327*d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 328*d0ec9796SRichard Henderson { 329*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 330*d0ec9796SRichard Henderson int shift = simd_data(desc); 331*d0ec9796SRichard Henderson intptr_t i; 332*d0ec9796SRichard Henderson 333*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 334*d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 335*d0ec9796SRichard Henderson } 336*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 337*d0ec9796SRichard Henderson } 338*d0ec9796SRichard Henderson 339*d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 340*d0ec9796SRichard Henderson { 341*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 342*d0ec9796SRichard Henderson int shift = simd_data(desc); 343*d0ec9796SRichard Henderson intptr_t i; 344*d0ec9796SRichard Henderson 345*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 346*d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 347*d0ec9796SRichard Henderson } 348*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 349*d0ec9796SRichard Henderson } 350*d0ec9796SRichard Henderson 351*d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 352*d0ec9796SRichard Henderson { 353*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 354*d0ec9796SRichard Henderson int shift = simd_data(desc); 355*d0ec9796SRichard Henderson intptr_t i; 356*d0ec9796SRichard Henderson 357*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 358*d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 359*d0ec9796SRichard Henderson } 360*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 361*d0ec9796SRichard Henderson } 362*d0ec9796SRichard Henderson 363*d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 364*d0ec9796SRichard Henderson { 365*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 366*d0ec9796SRichard Henderson int shift = simd_data(desc); 367*d0ec9796SRichard Henderson intptr_t i; 368*d0ec9796SRichard Henderson 369*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 370*d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 371*d0ec9796SRichard Henderson } 372*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 373*d0ec9796SRichard Henderson } 374*d0ec9796SRichard Henderson 375*d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 376*d0ec9796SRichard Henderson { 377*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 378*d0ec9796SRichard Henderson int shift = simd_data(desc); 379*d0ec9796SRichard Henderson intptr_t i; 380*d0ec9796SRichard Henderson 381*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 382*d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 383*d0ec9796SRichard Henderson } 384*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 385*d0ec9796SRichard Henderson } 386*d0ec9796SRichard Henderson 387*d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 388*d0ec9796SRichard Henderson { 389*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 390*d0ec9796SRichard Henderson int shift = simd_data(desc); 391*d0ec9796SRichard Henderson intptr_t i; 392*d0ec9796SRichard Henderson 393*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 394*d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 395*d0ec9796SRichard Henderson } 396*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 397*d0ec9796SRichard Henderson } 398*d0ec9796SRichard Henderson 399*d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 400*d0ec9796SRichard Henderson { 401*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 402*d0ec9796SRichard Henderson int shift = simd_data(desc); 403*d0ec9796SRichard Henderson intptr_t i; 404*d0ec9796SRichard Henderson 405*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 406*d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 407*d0ec9796SRichard Henderson } 408*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 409*d0ec9796SRichard Henderson } 410*d0ec9796SRichard Henderson 411*d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 412*d0ec9796SRichard Henderson { 413*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 414*d0ec9796SRichard Henderson int shift = simd_data(desc); 415*d0ec9796SRichard Henderson intptr_t i; 416*d0ec9796SRichard Henderson 417*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 418*d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 419*d0ec9796SRichard Henderson } 420*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 421*d0ec9796SRichard Henderson } 422*d0ec9796SRichard Henderson 423*d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 424*d0ec9796SRichard Henderson { 425*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 426*d0ec9796SRichard Henderson int shift = simd_data(desc); 427*d0ec9796SRichard Henderson intptr_t i; 428*d0ec9796SRichard Henderson 429*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 430*d0ec9796SRichard Henderson *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 431*d0ec9796SRichard Henderson } 432*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 433*d0ec9796SRichard Henderson } 434*d0ec9796SRichard Henderson 435*d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 436*d0ec9796SRichard Henderson { 437*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 438*d0ec9796SRichard Henderson int shift = simd_data(desc); 439*d0ec9796SRichard Henderson intptr_t i; 440*d0ec9796SRichard Henderson 441*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 442*d0ec9796SRichard Henderson *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 443*d0ec9796SRichard Henderson } 444*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 445*d0ec9796SRichard Henderson } 446*d0ec9796SRichard Henderson 447*d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 448*d0ec9796SRichard Henderson { 449*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 450*d0ec9796SRichard Henderson int shift = simd_data(desc); 451*d0ec9796SRichard Henderson intptr_t i; 452*d0ec9796SRichard Henderson 453*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 454*d0ec9796SRichard Henderson *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 455*d0ec9796SRichard Henderson } 456*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 457*d0ec9796SRichard Henderson } 458*d0ec9796SRichard Henderson 459*d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 460*d0ec9796SRichard Henderson { 461*d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 462*d0ec9796SRichard Henderson int shift = simd_data(desc); 463*d0ec9796SRichard Henderson intptr_t i; 464*d0ec9796SRichard Henderson 465*d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 466*d0ec9796SRichard Henderson *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 467*d0ec9796SRichard Henderson } 468*d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 469*d0ec9796SRichard Henderson } 470