1db432672SRichard Henderson /* 2db432672SRichard Henderson * Generic vectorized operation runtime 3db432672SRichard Henderson * 4db432672SRichard Henderson * Copyright (c) 2018 Linaro 5db432672SRichard Henderson * 6db432672SRichard Henderson * This library is free software; you can redistribute it and/or 7db432672SRichard Henderson * modify it under the terms of the GNU Lesser General Public 8db432672SRichard Henderson * License as published by the Free Software Foundation; either 9db432672SRichard Henderson * version 2 of the License, or (at your option) any later version. 10db432672SRichard Henderson * 11db432672SRichard Henderson * This library is distributed in the hope that it will be useful, 12db432672SRichard Henderson * but WITHOUT ANY WARRANTY; without even the implied warranty of 13db432672SRichard Henderson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14db432672SRichard Henderson * Lesser General Public License for more details. 15db432672SRichard Henderson * 16db432672SRichard Henderson * You should have received a copy of the GNU Lesser General Public 17db432672SRichard Henderson * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18db432672SRichard Henderson */ 19db432672SRichard Henderson 20db432672SRichard Henderson #include "qemu/osdep.h" 21db432672SRichard Henderson #include "qemu/host-utils.h" 22db432672SRichard Henderson #include "cpu.h" 23db432672SRichard Henderson #include "exec/helper-proto.h" 24db432672SRichard Henderson #include "tcg-gvec-desc.h" 25db432672SRichard Henderson 26db432672SRichard Henderson 27db432672SRichard Henderson /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28db432672SRichard Henderson * them via GCC's generic vector extension. This turns out to be simpler and 29db432672SRichard Henderson * more reliable than getting the compiler to autovectorize. 30db432672SRichard Henderson * 31db432672SRichard Henderson * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32db432672SRichard Henderson * are multiples of 16. 33db432672SRichard Henderson * 34db432672SRichard Henderson * When the compiler does not support all of the operations we require, the 35db432672SRichard Henderson * loops are written so that we can always fall back on the base types. 36db432672SRichard Henderson */ 37db432672SRichard Henderson #ifdef CONFIG_VECTOR16 38db432672SRichard Henderson typedef uint8_t vec8 __attribute__((vector_size(16))); 39db432672SRichard Henderson typedef uint16_t vec16 __attribute__((vector_size(16))); 40db432672SRichard Henderson typedef uint32_t vec32 __attribute__((vector_size(16))); 41db432672SRichard Henderson typedef uint64_t vec64 __attribute__((vector_size(16))); 42db432672SRichard Henderson 43db432672SRichard Henderson typedef int8_t svec8 __attribute__((vector_size(16))); 44db432672SRichard Henderson typedef int16_t svec16 __attribute__((vector_size(16))); 45db432672SRichard Henderson typedef int32_t svec32 __attribute__((vector_size(16))); 46db432672SRichard Henderson typedef int64_t svec64 __attribute__((vector_size(16))); 47db432672SRichard Henderson 48db432672SRichard Henderson #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49db432672SRichard Henderson #define DUP8(X) { X, X, X, X, X, X, X, X } 50db432672SRichard Henderson #define DUP4(X) { X, X, X, X } 51db432672SRichard Henderson #define DUP2(X) { X, X } 52db432672SRichard Henderson #else 53db432672SRichard Henderson typedef uint8_t vec8; 54db432672SRichard Henderson typedef uint16_t vec16; 55db432672SRichard Henderson typedef uint32_t vec32; 56db432672SRichard Henderson typedef uint64_t vec64; 57db432672SRichard Henderson 58db432672SRichard Henderson typedef int8_t svec8; 59db432672SRichard Henderson typedef int16_t svec16; 60db432672SRichard Henderson typedef int32_t svec32; 61db432672SRichard Henderson typedef int64_t svec64; 62db432672SRichard Henderson 63db432672SRichard Henderson #define DUP16(X) X 64db432672SRichard Henderson #define DUP8(X) X 65db432672SRichard Henderson #define DUP4(X) X 66db432672SRichard Henderson #define DUP2(X) X 67db432672SRichard Henderson #endif /* CONFIG_VECTOR16 */ 68db432672SRichard Henderson 69db432672SRichard Henderson static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70db432672SRichard Henderson { 71db432672SRichard Henderson intptr_t maxsz = simd_maxsz(desc); 72db432672SRichard Henderson intptr_t i; 73db432672SRichard Henderson 74db432672SRichard Henderson if (unlikely(maxsz > oprsz)) { 75db432672SRichard Henderson for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76db432672SRichard Henderson *(uint64_t *)(d + i) = 0; 77db432672SRichard Henderson } 78db432672SRichard Henderson } 79db432672SRichard Henderson } 80db432672SRichard Henderson 81db432672SRichard Henderson void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82db432672SRichard Henderson { 83db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 84db432672SRichard Henderson intptr_t i; 85db432672SRichard Henderson 86db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 87db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88db432672SRichard Henderson } 89db432672SRichard Henderson clear_high(d, oprsz, desc); 90db432672SRichard Henderson } 91db432672SRichard Henderson 92db432672SRichard Henderson void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93db432672SRichard Henderson { 94db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 95db432672SRichard Henderson intptr_t i; 96db432672SRichard Henderson 97db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 98db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99db432672SRichard Henderson } 100db432672SRichard Henderson clear_high(d, oprsz, desc); 101db432672SRichard Henderson } 102db432672SRichard Henderson 103db432672SRichard Henderson void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104db432672SRichard Henderson { 105db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 106db432672SRichard Henderson intptr_t i; 107db432672SRichard Henderson 108db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 109db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110db432672SRichard Henderson } 111db432672SRichard Henderson clear_high(d, oprsz, desc); 112db432672SRichard Henderson } 113db432672SRichard Henderson 114db432672SRichard Henderson void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115db432672SRichard Henderson { 116db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 117db432672SRichard Henderson intptr_t i; 118db432672SRichard Henderson 119db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 120db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121db432672SRichard Henderson } 122db432672SRichard Henderson clear_high(d, oprsz, desc); 123db432672SRichard Henderson } 124db432672SRichard Henderson 125db432672SRichard Henderson void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 126db432672SRichard Henderson { 127db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 128db432672SRichard Henderson intptr_t i; 129db432672SRichard Henderson 130db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 131db432672SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 132db432672SRichard Henderson } 133db432672SRichard Henderson clear_high(d, oprsz, desc); 134db432672SRichard Henderson } 135db432672SRichard Henderson 136db432672SRichard Henderson void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 137db432672SRichard Henderson { 138db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 139db432672SRichard Henderson intptr_t i; 140db432672SRichard Henderson 141db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 142db432672SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 143db432672SRichard Henderson } 144db432672SRichard Henderson clear_high(d, oprsz, desc); 145db432672SRichard Henderson } 146db432672SRichard Henderson 147db432672SRichard Henderson void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 148db432672SRichard Henderson { 149db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 150db432672SRichard Henderson intptr_t i; 151db432672SRichard Henderson 152db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 153db432672SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 154db432672SRichard Henderson } 155db432672SRichard Henderson clear_high(d, oprsz, desc); 156db432672SRichard Henderson } 157db432672SRichard Henderson 158db432672SRichard Henderson void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 159db432672SRichard Henderson { 160db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 161db432672SRichard Henderson intptr_t i; 162db432672SRichard Henderson 163db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 164db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 165db432672SRichard Henderson } 166db432672SRichard Henderson clear_high(d, oprsz, desc); 167db432672SRichard Henderson } 168db432672SRichard Henderson 169*3774030aSRichard Henderson void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 170*3774030aSRichard Henderson { 171*3774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 172*3774030aSRichard Henderson intptr_t i; 173*3774030aSRichard Henderson 174*3774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 175*3774030aSRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i); 176*3774030aSRichard Henderson } 177*3774030aSRichard Henderson clear_high(d, oprsz, desc); 178*3774030aSRichard Henderson } 179*3774030aSRichard Henderson 180*3774030aSRichard Henderson void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 181*3774030aSRichard Henderson { 182*3774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 183*3774030aSRichard Henderson intptr_t i; 184*3774030aSRichard Henderson 185*3774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 186*3774030aSRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i); 187*3774030aSRichard Henderson } 188*3774030aSRichard Henderson clear_high(d, oprsz, desc); 189*3774030aSRichard Henderson } 190*3774030aSRichard Henderson 191*3774030aSRichard Henderson void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 192*3774030aSRichard Henderson { 193*3774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 194*3774030aSRichard Henderson intptr_t i; 195*3774030aSRichard Henderson 196*3774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 197*3774030aSRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i); 198*3774030aSRichard Henderson } 199*3774030aSRichard Henderson clear_high(d, oprsz, desc); 200*3774030aSRichard Henderson } 201*3774030aSRichard Henderson 202*3774030aSRichard Henderson void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 203*3774030aSRichard Henderson { 204*3774030aSRichard Henderson intptr_t oprsz = simd_oprsz(desc); 205*3774030aSRichard Henderson intptr_t i; 206*3774030aSRichard Henderson 207*3774030aSRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 208*3774030aSRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i); 209*3774030aSRichard Henderson } 210*3774030aSRichard Henderson clear_high(d, oprsz, desc); 211*3774030aSRichard Henderson } 212*3774030aSRichard Henderson 213db432672SRichard Henderson void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 214db432672SRichard Henderson { 215db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 216db432672SRichard Henderson intptr_t i; 217db432672SRichard Henderson 218db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 219db432672SRichard Henderson *(vec8 *)(d + i) = -*(vec8 *)(a + i); 220db432672SRichard Henderson } 221db432672SRichard Henderson clear_high(d, oprsz, desc); 222db432672SRichard Henderson } 223db432672SRichard Henderson 224db432672SRichard Henderson void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 225db432672SRichard Henderson { 226db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 227db432672SRichard Henderson intptr_t i; 228db432672SRichard Henderson 229db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 230db432672SRichard Henderson *(vec16 *)(d + i) = -*(vec16 *)(a + i); 231db432672SRichard Henderson } 232db432672SRichard Henderson clear_high(d, oprsz, desc); 233db432672SRichard Henderson } 234db432672SRichard Henderson 235db432672SRichard Henderson void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 236db432672SRichard Henderson { 237db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 238db432672SRichard Henderson intptr_t i; 239db432672SRichard Henderson 240db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 241db432672SRichard Henderson *(vec32 *)(d + i) = -*(vec32 *)(a + i); 242db432672SRichard Henderson } 243db432672SRichard Henderson clear_high(d, oprsz, desc); 244db432672SRichard Henderson } 245db432672SRichard Henderson 246db432672SRichard Henderson void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 247db432672SRichard Henderson { 248db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 249db432672SRichard Henderson intptr_t i; 250db432672SRichard Henderson 251db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 252db432672SRichard Henderson *(vec64 *)(d + i) = -*(vec64 *)(a + i); 253db432672SRichard Henderson } 254db432672SRichard Henderson clear_high(d, oprsz, desc); 255db432672SRichard Henderson } 256db432672SRichard Henderson 257db432672SRichard Henderson void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 258db432672SRichard Henderson { 259db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 260db432672SRichard Henderson 261db432672SRichard Henderson memcpy(d, a, oprsz); 262db432672SRichard Henderson clear_high(d, oprsz, desc); 263db432672SRichard Henderson } 264db432672SRichard Henderson 265db432672SRichard Henderson void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 266db432672SRichard Henderson { 267db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 268db432672SRichard Henderson intptr_t i; 269db432672SRichard Henderson 270db432672SRichard Henderson if (c == 0) { 271db432672SRichard Henderson oprsz = 0; 272db432672SRichard Henderson } else { 273db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 274db432672SRichard Henderson *(uint64_t *)(d + i) = c; 275db432672SRichard Henderson } 276db432672SRichard Henderson } 277db432672SRichard Henderson clear_high(d, oprsz, desc); 278db432672SRichard Henderson } 279db432672SRichard Henderson 280db432672SRichard Henderson void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 281db432672SRichard Henderson { 282db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 283db432672SRichard Henderson intptr_t i; 284db432672SRichard Henderson 285db432672SRichard Henderson if (c == 0) { 286db432672SRichard Henderson oprsz = 0; 287db432672SRichard Henderson } else { 288db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 289db432672SRichard Henderson *(uint32_t *)(d + i) = c; 290db432672SRichard Henderson } 291db432672SRichard Henderson } 292db432672SRichard Henderson clear_high(d, oprsz, desc); 293db432672SRichard Henderson } 294db432672SRichard Henderson 295db432672SRichard Henderson void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 296db432672SRichard Henderson { 297db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 298db432672SRichard Henderson } 299db432672SRichard Henderson 300db432672SRichard Henderson void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 301db432672SRichard Henderson { 302db432672SRichard Henderson HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 303db432672SRichard Henderson } 304db432672SRichard Henderson 305db432672SRichard Henderson void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 306db432672SRichard Henderson { 307db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 308db432672SRichard Henderson intptr_t i; 309db432672SRichard Henderson 310db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 311db432672SRichard Henderson *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 312db432672SRichard Henderson } 313db432672SRichard Henderson clear_high(d, oprsz, desc); 314db432672SRichard Henderson } 315db432672SRichard Henderson 316db432672SRichard Henderson void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 317db432672SRichard Henderson { 318db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 319db432672SRichard Henderson intptr_t i; 320db432672SRichard Henderson 321db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 322db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 323db432672SRichard Henderson } 324db432672SRichard Henderson clear_high(d, oprsz, desc); 325db432672SRichard Henderson } 326db432672SRichard Henderson 327db432672SRichard Henderson void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 328db432672SRichard Henderson { 329db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 330db432672SRichard Henderson intptr_t i; 331db432672SRichard Henderson 332db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 333db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 334db432672SRichard Henderson } 335db432672SRichard Henderson clear_high(d, oprsz, desc); 336db432672SRichard Henderson } 337db432672SRichard Henderson 338db432672SRichard Henderson void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 339db432672SRichard Henderson { 340db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 341db432672SRichard Henderson intptr_t i; 342db432672SRichard Henderson 343db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 344db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 345db432672SRichard Henderson } 346db432672SRichard Henderson clear_high(d, oprsz, desc); 347db432672SRichard Henderson } 348db432672SRichard Henderson 349db432672SRichard Henderson void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 350db432672SRichard Henderson { 351db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 352db432672SRichard Henderson intptr_t i; 353db432672SRichard Henderson 354db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 355db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 356db432672SRichard Henderson } 357db432672SRichard Henderson clear_high(d, oprsz, desc); 358db432672SRichard Henderson } 359db432672SRichard Henderson 360db432672SRichard Henderson void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 361db432672SRichard Henderson { 362db432672SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 363db432672SRichard Henderson intptr_t i; 364db432672SRichard Henderson 365db432672SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 366db432672SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 367db432672SRichard Henderson } 368db432672SRichard Henderson clear_high(d, oprsz, desc); 369db432672SRichard Henderson } 370d0ec9796SRichard Henderson 371d0ec9796SRichard Henderson void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 372d0ec9796SRichard Henderson { 373d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 374d0ec9796SRichard Henderson int shift = simd_data(desc); 375d0ec9796SRichard Henderson intptr_t i; 376d0ec9796SRichard Henderson 377d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 378d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 379d0ec9796SRichard Henderson } 380d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 381d0ec9796SRichard Henderson } 382d0ec9796SRichard Henderson 383d0ec9796SRichard Henderson void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 384d0ec9796SRichard Henderson { 385d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 386d0ec9796SRichard Henderson int shift = simd_data(desc); 387d0ec9796SRichard Henderson intptr_t i; 388d0ec9796SRichard Henderson 389d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 390d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 391d0ec9796SRichard Henderson } 392d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 393d0ec9796SRichard Henderson } 394d0ec9796SRichard Henderson 395d0ec9796SRichard Henderson void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 396d0ec9796SRichard Henderson { 397d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 398d0ec9796SRichard Henderson int shift = simd_data(desc); 399d0ec9796SRichard Henderson intptr_t i; 400d0ec9796SRichard Henderson 401d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 402d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 403d0ec9796SRichard Henderson } 404d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 405d0ec9796SRichard Henderson } 406d0ec9796SRichard Henderson 407d0ec9796SRichard Henderson void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 408d0ec9796SRichard Henderson { 409d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 410d0ec9796SRichard Henderson int shift = simd_data(desc); 411d0ec9796SRichard Henderson intptr_t i; 412d0ec9796SRichard Henderson 413d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 414d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 415d0ec9796SRichard Henderson } 416d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 417d0ec9796SRichard Henderson } 418d0ec9796SRichard Henderson 419d0ec9796SRichard Henderson void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 420d0ec9796SRichard Henderson { 421d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 422d0ec9796SRichard Henderson int shift = simd_data(desc); 423d0ec9796SRichard Henderson intptr_t i; 424d0ec9796SRichard Henderson 425d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 426d0ec9796SRichard Henderson *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 427d0ec9796SRichard Henderson } 428d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 429d0ec9796SRichard Henderson } 430d0ec9796SRichard Henderson 431d0ec9796SRichard Henderson void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 432d0ec9796SRichard Henderson { 433d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 434d0ec9796SRichard Henderson int shift = simd_data(desc); 435d0ec9796SRichard Henderson intptr_t i; 436d0ec9796SRichard Henderson 437d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 438d0ec9796SRichard Henderson *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 439d0ec9796SRichard Henderson } 440d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 441d0ec9796SRichard Henderson } 442d0ec9796SRichard Henderson 443d0ec9796SRichard Henderson void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 444d0ec9796SRichard Henderson { 445d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 446d0ec9796SRichard Henderson int shift = simd_data(desc); 447d0ec9796SRichard Henderson intptr_t i; 448d0ec9796SRichard Henderson 449d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 450d0ec9796SRichard Henderson *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 451d0ec9796SRichard Henderson } 452d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 453d0ec9796SRichard Henderson } 454d0ec9796SRichard Henderson 455d0ec9796SRichard Henderson void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 456d0ec9796SRichard Henderson { 457d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 458d0ec9796SRichard Henderson int shift = simd_data(desc); 459d0ec9796SRichard Henderson intptr_t i; 460d0ec9796SRichard Henderson 461d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 462d0ec9796SRichard Henderson *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 463d0ec9796SRichard Henderson } 464d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 465d0ec9796SRichard Henderson } 466d0ec9796SRichard Henderson 467d0ec9796SRichard Henderson void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 468d0ec9796SRichard Henderson { 469d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 470d0ec9796SRichard Henderson int shift = simd_data(desc); 471d0ec9796SRichard Henderson intptr_t i; 472d0ec9796SRichard Henderson 473d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec8)) { 474d0ec9796SRichard Henderson *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 475d0ec9796SRichard Henderson } 476d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 477d0ec9796SRichard Henderson } 478d0ec9796SRichard Henderson 479d0ec9796SRichard Henderson void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 480d0ec9796SRichard Henderson { 481d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 482d0ec9796SRichard Henderson int shift = simd_data(desc); 483d0ec9796SRichard Henderson intptr_t i; 484d0ec9796SRichard Henderson 485d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec16)) { 486d0ec9796SRichard Henderson *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 487d0ec9796SRichard Henderson } 488d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 489d0ec9796SRichard Henderson } 490d0ec9796SRichard Henderson 491d0ec9796SRichard Henderson void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 492d0ec9796SRichard Henderson { 493d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 494d0ec9796SRichard Henderson int shift = simd_data(desc); 495d0ec9796SRichard Henderson intptr_t i; 496d0ec9796SRichard Henderson 497d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec32)) { 498d0ec9796SRichard Henderson *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 499d0ec9796SRichard Henderson } 500d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 501d0ec9796SRichard Henderson } 502d0ec9796SRichard Henderson 503d0ec9796SRichard Henderson void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 504d0ec9796SRichard Henderson { 505d0ec9796SRichard Henderson intptr_t oprsz = simd_oprsz(desc); 506d0ec9796SRichard Henderson int shift = simd_data(desc); 507d0ec9796SRichard Henderson intptr_t i; 508d0ec9796SRichard Henderson 509d0ec9796SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { 510d0ec9796SRichard Henderson *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 511d0ec9796SRichard Henderson } 512d0ec9796SRichard Henderson clear_high(d, oprsz, desc); 513d0ec9796SRichard Henderson } 514212be173SRichard Henderson 515212be173SRichard Henderson /* If vectors are enabled, the compiler fills in -1 for true. 516212be173SRichard Henderson Otherwise, we must take care of this by hand. */ 517212be173SRichard Henderson #ifdef CONFIG_VECTOR16 518212be173SRichard Henderson # define DO_CMP0(X) X 519212be173SRichard Henderson #else 520212be173SRichard Henderson # define DO_CMP0(X) -(X) 521212be173SRichard Henderson #endif 522212be173SRichard Henderson 523212be173SRichard Henderson #define DO_CMP1(NAME, TYPE, OP) \ 524212be173SRichard Henderson void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 525212be173SRichard Henderson { \ 526212be173SRichard Henderson intptr_t oprsz = simd_oprsz(desc); \ 527212be173SRichard Henderson intptr_t i; \ 528212be173SRichard Henderson for (i = 0; i < oprsz; i += sizeof(vec64)) { \ 529212be173SRichard Henderson *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 530212be173SRichard Henderson } \ 531212be173SRichard Henderson clear_high(d, oprsz, desc); \ 532212be173SRichard Henderson } 533212be173SRichard Henderson 534212be173SRichard Henderson #define DO_CMP2(SZ) \ 535212be173SRichard Henderson DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 536212be173SRichard Henderson DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 537212be173SRichard Henderson DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 538212be173SRichard Henderson DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 539212be173SRichard Henderson DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 540212be173SRichard Henderson DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 541212be173SRichard Henderson 542212be173SRichard Henderson DO_CMP2(8) 543212be173SRichard Henderson DO_CMP2(16) 544212be173SRichard Henderson DO_CMP2(32) 545212be173SRichard Henderson DO_CMP2(64) 546212be173SRichard Henderson 547212be173SRichard Henderson #undef DO_CMP0 548212be173SRichard Henderson #undef DO_CMP1 549212be173SRichard Henderson #undef DO_CMP2 550