1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9db432672SRichard Henderson * version 2 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24db432672SRichard Henderson #include "tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28db432672SRichard Henderson * them via GCC's generic vector extension. This turns out to be simpler and 29db432672SRichard Henderson * more reliable than getting the compiler to autovectorize. 30db432672SRichard Henderson * 31db432672SRichard Henderson * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32db432672SRichard Henderson * are multiples of 16. 33db432672SRichard Henderson * 34db432672SRichard Henderson * When the compiler does not support all of the operations we require, the 35db432672SRichard Henderson * loops are written so that we can always fall back on the base types. 36db432672SRichard Henderson */ 37db432672SRichard Henderson #ifdef CONFIG_VECTOR16 38db432672SRichard Henderson typedef uint8_t vec8 __attribute__((vector_size(16))); 39db432672SRichard Henderson typedef uint16_t vec16 __attribute__((vector_size(16))); 40db432672SRichard Henderson typedef uint32_t vec32 __attribute__((vector_size(16))); 41db432672SRichard Henderson typedef uint64_t vec64 __attribute__((vector_size(16))); 42db432672SRichard Henderson 43db432672SRichard Henderson typedef int8_t svec8 __attribute__((vector_size(16))); 44db432672SRichard Henderson typedef int16_t svec16 __attribute__((vector_size(16))); 45db432672SRichard Henderson typedef int32_t svec32 __attribute__((vector_size(16))); 46db432672SRichard Henderson typedef int64_t svec64 __attribute__((vector_size(16))); 47db432672SRichard Henderson 48db432672SRichard Henderson #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49db432672SRichard Henderson #define DUP8(X) { X, X, X, X, X, X, X, X } 50db432672SRichard Henderson #define DUP4(X) { X, X, X, X } 51db432672SRichard Henderson #define DUP2(X) { X, X } 52db432672SRichard Henderson #else 53db432672SRichard Henderson typedef uint8_t vec8; 54db432672SRichard Henderson typedef uint16_t vec16; 55db432672SRichard Henderson typedef uint32_t vec32; 56db432672SRichard Henderson typedef uint64_t vec64; 57db432672SRichard Henderson 58db432672SRichard Henderson typedef int8_t svec8; 59db432672SRichard Henderson typedef int16_t svec16; 60db432672SRichard Henderson typedef int32_t svec32; 61db432672SRichard Henderson typedef int64_t svec64; 62db432672SRichard Henderson 63db432672SRichard Henderson #define DUP16(X) X 64db432672SRichard Henderson #define DUP8(X) X 65db432672SRichard Henderson #define DUP4(X) X 66db432672SRichard Henderson #define DUP2(X) X 67db432672SRichard Henderson #endif /* CONFIG_VECTOR16 */ 68db432672SRichard Henderson 69db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70db432672SRichard Henderson { 71db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 72db432672SRichard Henderson intptr_t i; 73db432672SRichard Henderson 74db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 75db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 77db432672SRichard Henderson } 78db432672SRichard Henderson } 79db432672SRichard Henderson } 80db432672SRichard Henderson 81db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82db432672SRichard Henderson { 83db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 84db432672SRichard Henderson intptr_t i; 85db432672SRichard Henderson 86db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 87db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88db432672SRichard Henderson } 89db432672SRichard Henderson clear_high(d, oprsz, desc); 90db432672SRichard Henderson } 91db432672SRichard Henderson 92db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93db432672SRichard Henderson { 94db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 95db432672SRichard Henderson intptr_t i; 96db432672SRichard Henderson 97db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 98db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99db432672SRichard Henderson } 100db432672SRichard Henderson clear_high(d, oprsz, desc); 101db432672SRichard Henderson } 102db432672SRichard Henderson 103db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104db432672SRichard Henderson { 105db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 106db432672SRichard Henderson intptr_t i; 107db432672SRichard Henderson 108db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 109db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110db432672SRichard Henderson } 111db432672SRichard Henderson clear_high(d, oprsz, desc); 112db432672SRichard Henderson } 113db432672SRichard Henderson 114db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115db432672SRichard Henderson { 116db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 117db432672SRichard Henderson intptr_t i; 118db432672SRichard Henderson 119db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 120db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121db432672SRichard Henderson } 122db432672SRichard Henderson clear_high(d, oprsz, desc); 123db432672SRichard Henderson } 124db432672SRichard Henderson 125db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 126db432672SRichard Henderson { 127db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 128db432672SRichard Henderson intptr_t i; 129db432672SRichard Henderson 130db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 131db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 132db432672SRichard Henderson } 133db432672SRichard Henderson clear_high(d, oprsz, desc); 134db432672SRichard Henderson } 135db432672SRichard Henderson 136db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 137db432672SRichard Henderson { 138db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 139db432672SRichard Henderson intptr_t i; 140db432672SRichard Henderson 141db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 142db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 143db432672SRichard Henderson } 144db432672SRichard Henderson clear_high(d, oprsz, desc); 145db432672SRichard Henderson } 146db432672SRichard Henderson 147db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 148db432672SRichard Henderson { 149db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 150db432672SRichard Henderson intptr_t i; 151db432672SRichard Henderson 152db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 153db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 154db432672SRichard Henderson } 155db432672SRichard Henderson clear_high(d, oprsz, desc); 156db432672SRichard Henderson } 157db432672SRichard Henderson 158db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 159db432672SRichard Henderson { 160db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 161db432672SRichard Henderson intptr_t i; 162db432672SRichard Henderson 163db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 164db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 165db432672SRichard Henderson } 166db432672SRichard Henderson clear_high(d, oprsz, desc); 167db432672SRichard Henderson } 168db432672SRichard Henderson 169db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 170db432672SRichard Henderson { 171db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 172db432672SRichard Henderson intptr_t i; 173db432672SRichard Henderson 174db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 175db432672SRichard Henderson *(vec8 *)(d + i) = -*(vec8 *)(a + i); 176db432672SRichard Henderson } 177db432672SRichard Henderson clear_high(d, oprsz, desc); 178db432672SRichard Henderson } 179db432672SRichard Henderson 180db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 181db432672SRichard Henderson { 182db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 183db432672SRichard Henderson intptr_t i; 184db432672SRichard Henderson 185db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 186db432672SRichard Henderson *(vec16 *)(d + i) = -*(vec16 *)(a + i); 187db432672SRichard Henderson } 188db432672SRichard Henderson clear_high(d, oprsz, desc); 189db432672SRichard Henderson } 190db432672SRichard Henderson 191db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 192db432672SRichard Henderson { 193db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 194db432672SRichard Henderson intptr_t i; 195db432672SRichard Henderson 196db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 197db432672SRichard Henderson *(vec32 *)(d + i) = -*(vec32 *)(a + i); 198db432672SRichard Henderson } 199db432672SRichard Henderson clear_high(d, oprsz, desc); 200db432672SRichard Henderson } 201db432672SRichard Henderson 202db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 203db432672SRichard Henderson { 204db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 205db432672SRichard Henderson intptr_t i; 206db432672SRichard Henderson 207db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 208db432672SRichard Henderson *(vec64 *)(d + i) = -*(vec64 *)(a + i); 209db432672SRichard Henderson } 210db432672SRichard Henderson clear_high(d, oprsz, desc); 211db432672SRichard Henderson } 212db432672SRichard Henderson 213db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 214db432672SRichard Henderson { 215db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 216db432672SRichard Henderson 217db432672SRichard Henderson memcpy(d, a, oprsz); 218db432672SRichard Henderson clear_high(d, oprsz, desc); 219db432672SRichard Henderson } 220db432672SRichard Henderson 221db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 222db432672SRichard Henderson { 223db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 224db432672SRichard Henderson intptr_t i; 225db432672SRichard Henderson 226db432672SRichard Henderson if (c == 0) { 227db432672SRichard Henderson oprsz = 0; 228db432672SRichard Henderson } else { 229db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 230db432672SRichard Henderson *(uint64_t *)(d + i) = c; 231db432672SRichard Henderson } 232db432672SRichard Henderson } 233db432672SRichard Henderson clear_high(d, oprsz, desc); 234db432672SRichard Henderson } 235db432672SRichard Henderson 236db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 237db432672SRichard Henderson { 238db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 239db432672SRichard Henderson intptr_t i; 240db432672SRichard Henderson 241db432672SRichard Henderson if (c == 0) { 242db432672SRichard Henderson oprsz = 0; 243db432672SRichard Henderson } else { 244db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 245db432672SRichard Henderson *(uint32_t *)(d + i) = c; 246db432672SRichard Henderson } 247db432672SRichard Henderson } 248db432672SRichard Henderson clear_high(d, oprsz, desc); 249db432672SRichard Henderson } 250db432672SRichard Henderson 251db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 252db432672SRichard Henderson { 253db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 254db432672SRichard Henderson } 255db432672SRichard Henderson 256db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 257db432672SRichard Henderson { 258db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 259db432672SRichard Henderson } 260db432672SRichard Henderson 261db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 262db432672SRichard Henderson { 263db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 264db432672SRichard Henderson intptr_t i; 265db432672SRichard Henderson 266db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 267db432672SRichard Henderson *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 268db432672SRichard Henderson } 269db432672SRichard Henderson clear_high(d, oprsz, desc); 270db432672SRichard Henderson } 271db432672SRichard Henderson 272db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 273db432672SRichard Henderson { 274db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 275db432672SRichard Henderson intptr_t i; 276db432672SRichard Henderson 277db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 278db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 279db432672SRichard Henderson } 280db432672SRichard Henderson clear_high(d, oprsz, desc); 281db432672SRichard Henderson } 282db432672SRichard Henderson 283db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 284db432672SRichard Henderson { 285db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 286db432672SRichard Henderson intptr_t i; 287db432672SRichard Henderson 288db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 289db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 290db432672SRichard Henderson } 291db432672SRichard Henderson clear_high(d, oprsz, desc); 292db432672SRichard Henderson } 293db432672SRichard Henderson 294db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 295db432672SRichard Henderson { 296db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 297db432672SRichard Henderson intptr_t i; 298db432672SRichard Henderson 299db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 300db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 301db432672SRichard Henderson } 302db432672SRichard Henderson clear_high(d, oprsz, desc); 303db432672SRichard Henderson } 304db432672SRichard Henderson 305db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 306db432672SRichard Henderson { 307db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 308db432672SRichard Henderson intptr_t i; 309db432672SRichard Henderson 310db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 311db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 312db432672SRichard Henderson } 313db432672SRichard Henderson clear_high(d, oprsz, desc); 314db432672SRichard Henderson } 315db432672SRichard Henderson 316db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 317db432672SRichard Henderson { 318db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 319db432672SRichard Henderson intptr_t i; 320db432672SRichard Henderson 321db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 322db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 323db432672SRichard Henderson } 324db432672SRichard Henderson clear_high(d, oprsz, desc); 325db432672SRichard Henderson } 326d0ec9796SRichard Henderson 327d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 328d0ec9796SRichard Henderson { 329d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 330d0ec9796SRichard Henderson int shift = simd_data(desc); 331d0ec9796SRichard Henderson intptr_t i; 332d0ec9796SRichard Henderson 333d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 334d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 335d0ec9796SRichard Henderson } 336d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 337d0ec9796SRichard Henderson } 338d0ec9796SRichard Henderson 339d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 340d0ec9796SRichard Henderson { 341d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 342d0ec9796SRichard Henderson int shift = simd_data(desc); 343d0ec9796SRichard Henderson intptr_t i; 344d0ec9796SRichard Henderson 345d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 346d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 347d0ec9796SRichard Henderson } 348d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 349d0ec9796SRichard Henderson } 350d0ec9796SRichard Henderson 351d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 352d0ec9796SRichard Henderson { 353d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 354d0ec9796SRichard Henderson int shift = simd_data(desc); 355d0ec9796SRichard Henderson intptr_t i; 356d0ec9796SRichard Henderson 357d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 358d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 359d0ec9796SRichard Henderson } 360d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 361d0ec9796SRichard Henderson } 362d0ec9796SRichard Henderson 363d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 364d0ec9796SRichard Henderson { 365d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 366d0ec9796SRichard Henderson int shift = simd_data(desc); 367d0ec9796SRichard Henderson intptr_t i; 368d0ec9796SRichard Henderson 369d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 370d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 371d0ec9796SRichard Henderson } 372d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 373d0ec9796SRichard Henderson } 374d0ec9796SRichard Henderson 375d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 376d0ec9796SRichard Henderson { 377d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 378d0ec9796SRichard Henderson int shift = simd_data(desc); 379d0ec9796SRichard Henderson intptr_t i; 380d0ec9796SRichard Henderson 381d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 382d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 383d0ec9796SRichard Henderson } 384d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 385d0ec9796SRichard Henderson } 386d0ec9796SRichard Henderson 387d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 388d0ec9796SRichard Henderson { 389d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 390d0ec9796SRichard Henderson int shift = simd_data(desc); 391d0ec9796SRichard Henderson intptr_t i; 392d0ec9796SRichard Henderson 393d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 394d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 395d0ec9796SRichard Henderson } 396d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 397d0ec9796SRichard Henderson } 398d0ec9796SRichard Henderson 399d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 400d0ec9796SRichard Henderson { 401d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 402d0ec9796SRichard Henderson int shift = simd_data(desc); 403d0ec9796SRichard Henderson intptr_t i; 404d0ec9796SRichard Henderson 405d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 406d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 407d0ec9796SRichard Henderson } 408d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 409d0ec9796SRichard Henderson } 410d0ec9796SRichard Henderson 411d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 412d0ec9796SRichard Henderson { 413d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 414d0ec9796SRichard Henderson int shift = simd_data(desc); 415d0ec9796SRichard Henderson intptr_t i; 416d0ec9796SRichard Henderson 417d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 418d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 419d0ec9796SRichard Henderson } 420d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 421d0ec9796SRichard Henderson } 422d0ec9796SRichard Henderson 423d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 424d0ec9796SRichard Henderson { 425d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 426d0ec9796SRichard Henderson int shift = simd_data(desc); 427d0ec9796SRichard Henderson intptr_t i; 428d0ec9796SRichard Henderson 429d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 430d0ec9796SRichard Henderson *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 431d0ec9796SRichard Henderson } 432d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 433d0ec9796SRichard Henderson } 434d0ec9796SRichard Henderson 435d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 436d0ec9796SRichard Henderson { 437d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 438d0ec9796SRichard Henderson int shift = simd_data(desc); 439d0ec9796SRichard Henderson intptr_t i; 440d0ec9796SRichard Henderson 441d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 442d0ec9796SRichard Henderson *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 443d0ec9796SRichard Henderson } 444d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 445d0ec9796SRichard Henderson } 446d0ec9796SRichard Henderson 447d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 448d0ec9796SRichard Henderson { 449d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 450d0ec9796SRichard Henderson int shift = simd_data(desc); 451d0ec9796SRichard Henderson intptr_t i; 452d0ec9796SRichard Henderson 453d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 454d0ec9796SRichard Henderson *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 455d0ec9796SRichard Henderson } 456d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 457d0ec9796SRichard Henderson } 458d0ec9796SRichard Henderson 459d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 460d0ec9796SRichard Henderson { 461d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 462d0ec9796SRichard Henderson int shift = simd_data(desc); 463d0ec9796SRichard Henderson intptr_t i; 464d0ec9796SRichard Henderson 465d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 466d0ec9796SRichard Henderson *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 467d0ec9796SRichard Henderson } 468d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 469d0ec9796SRichard Henderson } 470*212be173SRichard Henderson 471*212be173SRichard Henderson /* If vectors are enabled, the compiler fills in -1 for true. 472*212be173SRichard Henderson Otherwise, we must take care of this by hand. */ 473*212be173SRichard Henderson #ifdef CONFIG_VECTOR16 474*212be173SRichard Henderson # define DO_CMP0(X) X 475*212be173SRichard Henderson #else 476*212be173SRichard Henderson # define DO_CMP0(X) -(X) 477*212be173SRichard Henderson #endif 478*212be173SRichard Henderson 479*212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \ 480*212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 481*212be173SRichard Henderson { \ 482*212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \ 483*212be173SRichard Henderson intptr_t i; \ 484*212be173SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { \ 485*212be173SRichard Henderson *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 486*212be173SRichard Henderson } \ 487*212be173SRichard Henderson clear_high(d, oprsz, desc); \ 488*212be173SRichard Henderson } 489*212be173SRichard Henderson 490*212be173SRichard Henderson #define DO_CMP2(SZ) \ 491*212be173SRichard Henderson DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 492*212be173SRichard Henderson DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 493*212be173SRichard Henderson DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 494*212be173SRichard Henderson DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 495*212be173SRichard Henderson DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 496*212be173SRichard Henderson DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 497*212be173SRichard Henderson 498*212be173SRichard Henderson DO_CMP2(8) 499*212be173SRichard Henderson DO_CMP2(16) 500*212be173SRichard Henderson DO_CMP2(32) 501*212be173SRichard Henderson DO_CMP2(64) 502*212be173SRichard Henderson 503*212be173SRichard Henderson #undef DO_CMP0 504*212be173SRichard Henderson #undef DO_CMP1 505*212be173SRichard Henderson #undef DO_CMP2 506