xref: /qemu/target/arm/tcg/vec_helper.c (revision e7186d822955c351e4aac504380f82217c670321)
1 /*
2  * ARM AdvSIMD / SVE Vector Operations
3  *
4  * Copyright (c) 2018 Linaro
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "exec/helper-proto.h"
24 #include "tcg/tcg-gvec-desc.h"
25 
26 
27 #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
28 
29 static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
30 {
31     uint64_t *d = vd + opr_sz;
32     uintptr_t i;
33 
34     for (i = opr_sz; i < max_sz; i += 8) {
35         *d++ = 0;
36     }
37 }
38 
39 /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
40 static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
41                                 int16_t src2, int16_t src3)
42 {
43     /* Simplify:
44      * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
45      * = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15
46      */
47     int32_t ret = (int32_t)src1 * src2;
48     ret = ((int32_t)src3 << 15) + ret + (1 << 14);
49     ret >>= 15;
50     if (ret != (int16_t)ret) {
51         SET_QC();
52         ret = (ret < 0 ? -0x8000 : 0x7fff);
53     }
54     return ret;
55 }
56 
57 uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
58                                   uint32_t src2, uint32_t src3)
59 {
60     uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3);
61     uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
62     return deposit32(e1, 16, 16, e2);
63 }
64 
65 void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
66                               void *ve, uint32_t desc)
67 {
68     uintptr_t opr_sz = simd_oprsz(desc);
69     int16_t *d = vd;
70     int16_t *n = vn;
71     int16_t *m = vm;
72     CPUARMState *env = ve;
73     uintptr_t i;
74 
75     for (i = 0; i < opr_sz / 2; ++i) {
76         d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]);
77     }
78     clear_tail(d, opr_sz, simd_maxsz(desc));
79 }
80 
81 /* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
82 static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
83                                 int16_t src2, int16_t src3)
84 {
85     /* Similarly, using subtraction:
86      * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
87      * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15
88      */
89     int32_t ret = (int32_t)src1 * src2;
90     ret = ((int32_t)src3 << 15) - ret + (1 << 14);
91     ret >>= 15;
92     if (ret != (int16_t)ret) {
93         SET_QC();
94         ret = (ret < 0 ? -0x8000 : 0x7fff);
95     }
96     return ret;
97 }
98 
99 uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
100                                   uint32_t src2, uint32_t src3)
101 {
102     uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3);
103     uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
104     return deposit32(e1, 16, 16, e2);
105 }
106 
107 void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
108                               void *ve, uint32_t desc)
109 {
110     uintptr_t opr_sz = simd_oprsz(desc);
111     int16_t *d = vd;
112     int16_t *n = vn;
113     int16_t *m = vm;
114     CPUARMState *env = ve;
115     uintptr_t i;
116 
117     for (i = 0; i < opr_sz / 2; ++i) {
118         d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]);
119     }
120     clear_tail(d, opr_sz, simd_maxsz(desc));
121 }
122 
123 /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
124 uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
125                                   int32_t src2, int32_t src3)
126 {
127     /* Simplify similarly to int_qrdmlah_s16 above.  */
128     int64_t ret = (int64_t)src1 * src2;
129     ret = ((int64_t)src3 << 31) + ret + (1 << 30);
130     ret >>= 31;
131     if (ret != (int32_t)ret) {
132         SET_QC();
133         ret = (ret < 0 ? INT32_MIN : INT32_MAX);
134     }
135     return ret;
136 }
137 
138 void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
139                               void *ve, uint32_t desc)
140 {
141     uintptr_t opr_sz = simd_oprsz(desc);
142     int32_t *d = vd;
143     int32_t *n = vn;
144     int32_t *m = vm;
145     CPUARMState *env = ve;
146     uintptr_t i;
147 
148     for (i = 0; i < opr_sz / 4; ++i) {
149         d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]);
150     }
151     clear_tail(d, opr_sz, simd_maxsz(desc));
152 }
153 
154 /* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
155 uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
156                                   int32_t src2, int32_t src3)
157 {
158     /* Simplify similarly to int_qrdmlsh_s16 above.  */
159     int64_t ret = (int64_t)src1 * src2;
160     ret = ((int64_t)src3 << 31) - ret + (1 << 30);
161     ret >>= 31;
162     if (ret != (int32_t)ret) {
163         SET_QC();
164         ret = (ret < 0 ? INT32_MIN : INT32_MAX);
165     }
166     return ret;
167 }
168 
169 void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
170                               void *ve, uint32_t desc)
171 {
172     uintptr_t opr_sz = simd_oprsz(desc);
173     int32_t *d = vd;
174     int32_t *n = vn;
175     int32_t *m = vm;
176     CPUARMState *env = ve;
177     uintptr_t i;
178 
179     for (i = 0; i < opr_sz / 4; ++i) {
180         d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]);
181     }
182     clear_tail(d, opr_sz, simd_maxsz(desc));
183 }
184