1 /* 2 * ARM AdvSIMD / SVE Vector Operations 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "exec/helper-proto.h" 24 #include "tcg/tcg-gvec-desc.h" 25 26 27 #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q 28 29 static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) 30 { 31 uint64_t *d = vd + opr_sz; 32 uintptr_t i; 33 34 for (i = opr_sz; i < max_sz; i += 8) { 35 *d++ = 0; 36 } 37 } 38 39 /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ 40 static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1, 41 int16_t src2, int16_t src3) 42 { 43 /* Simplify: 44 * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16 45 * = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15 46 */ 47 int32_t ret = (int32_t)src1 * src2; 48 ret = ((int32_t)src3 << 15) + ret + (1 << 14); 49 ret >>= 15; 50 if (ret != (int16_t)ret) { 51 SET_QC(); 52 ret = (ret < 0 ? -0x8000 : 0x7fff); 53 } 54 return ret; 55 } 56 57 uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1, 58 uint32_t src2, uint32_t src3) 59 { 60 uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3); 61 uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16); 62 return deposit32(e1, 16, 16, e2); 63 } 64 65 void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm, 66 void *ve, uint32_t desc) 67 { 68 uintptr_t opr_sz = simd_oprsz(desc); 69 int16_t *d = vd; 70 int16_t *n = vn; 71 int16_t *m = vm; 72 CPUARMState *env = ve; 73 uintptr_t i; 74 75 for (i = 0; i < opr_sz / 2; ++i) { 76 d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]); 77 } 78 clear_tail(d, opr_sz, simd_maxsz(desc)); 79 } 80 81 /* Signed saturating rounding doubling multiply-subtract high half, 16-bit */ 82 static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1, 83 int16_t src2, int16_t src3) 84 { 85 /* Similarly, using subtraction: 86 * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16 87 * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15 88 */ 89 int32_t ret = (int32_t)src1 * src2; 90 ret = ((int32_t)src3 << 15) - ret + (1 << 14); 91 ret >>= 15; 92 if (ret != (int16_t)ret) { 93 SET_QC(); 94 ret = (ret < 0 ? -0x8000 : 0x7fff); 95 } 96 return ret; 97 } 98 99 uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1, 100 uint32_t src2, uint32_t src3) 101 { 102 uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3); 103 uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16); 104 return deposit32(e1, 16, 16, e2); 105 } 106 107 void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm, 108 void *ve, uint32_t desc) 109 { 110 uintptr_t opr_sz = simd_oprsz(desc); 111 int16_t *d = vd; 112 int16_t *n = vn; 113 int16_t *m = vm; 114 CPUARMState *env = ve; 115 uintptr_t i; 116 117 for (i = 0; i < opr_sz / 2; ++i) { 118 d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]); 119 } 120 clear_tail(d, opr_sz, simd_maxsz(desc)); 121 } 122 123 /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */ 124 uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1, 125 int32_t src2, int32_t src3) 126 { 127 /* Simplify similarly to int_qrdmlah_s16 above. */ 128 int64_t ret = (int64_t)src1 * src2; 129 ret = ((int64_t)src3 << 31) + ret + (1 << 30); 130 ret >>= 31; 131 if (ret != (int32_t)ret) { 132 SET_QC(); 133 ret = (ret < 0 ? INT32_MIN : INT32_MAX); 134 } 135 return ret; 136 } 137 138 void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, 139 void *ve, uint32_t desc) 140 { 141 uintptr_t opr_sz = simd_oprsz(desc); 142 int32_t *d = vd; 143 int32_t *n = vn; 144 int32_t *m = vm; 145 CPUARMState *env = ve; 146 uintptr_t i; 147 148 for (i = 0; i < opr_sz / 4; ++i) { 149 d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]); 150 } 151 clear_tail(d, opr_sz, simd_maxsz(desc)); 152 } 153 154 /* Signed saturating rounding doubling multiply-subtract high half, 32-bit */ 155 uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1, 156 int32_t src2, int32_t src3) 157 { 158 /* Simplify similarly to int_qrdmlsh_s16 above. */ 159 int64_t ret = (int64_t)src1 * src2; 160 ret = ((int64_t)src3 << 31) - ret + (1 << 30); 161 ret >>= 31; 162 if (ret != (int32_t)ret) { 163 SET_QC(); 164 ret = (ret < 0 ? INT32_MIN : INT32_MAX); 165 } 166 return ret; 167 } 168 169 void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, 170 void *ve, uint32_t desc) 171 { 172 uintptr_t opr_sz = simd_oprsz(desc); 173 int32_t *d = vd; 174 int32_t *n = vn; 175 int32_t *m = vm; 176 CPUARMState *env = ve; 177 uintptr_t i; 178 179 for (i = 0; i < opr_sz / 4; ++i) { 180 d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]); 181 } 182 clear_tail(d, opr_sz, simd_maxsz(desc)); 183 } 184