xref: /qemu/target/mips/tcg/msa_helper.c (revision 84307cd6027c4602913177ff09aeefa4743b7234)
1 /*
2  * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
3  *
4  * Copyright (c) 2014 Imagination Technologies
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "tcg/tcg.h"
24 #include "accel/tcg/cpu-ldst.h"
25 #include "accel/tcg/probe.h"
26 #include "exec/helper-proto.h"
27 #include "exec/memop.h"
28 #include "exec/target_page.h"
29 #include "fpu/softfloat.h"
30 #include "fpu_helper.h"
31 
32 /* Data format min and max values */
33 #define DF_BITS(df) (1 << ((df) + 3))
34 
35 #define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
36 #define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
37 
38 #define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
39 #define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
40 
41 #define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
42 #define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
43 
44 #define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
45 #define SIGNED(x, df)                                                   \
46     ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
47 
48 /* Element-by-element access macros */
49 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
50 
51 
52 
53 /*
54  * Bit Count
55  * ---------
56  *
57  * +---------------+----------------------------------------------------------+
58  * | NLOC.B        | Vector Leading Ones Count (byte)                         |
59  * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
60  * | NLOC.W        | Vector Leading Ones Count (word)                         |
61  * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
62  * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
63  * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
64  * | NLZC.W        | Vector Leading Zeros Count (word)                        |
65  * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
66  * | PCNT.B        | Vector Population Count (byte)                           |
67  * | PCNT.H        | Vector Population Count (halfword)                       |
68  * | PCNT.W        | Vector Population Count (word)                           |
69  * | PCNT.D        | Vector Population Count (doubleword)                     |
70  * +---------------+----------------------------------------------------------+
71  */
72 
msa_nlzc_df(uint32_t df,int64_t arg)73 static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
74 {
75     uint64_t x, y;
76     int n, c;
77 
78     x = UNSIGNED(arg, df);
79     n = DF_BITS(df);
80     c = DF_BITS(df) / 2;
81 
82     do {
83         y = x >> c;
84         if (y != 0) {
85             n = n - c;
86             x = y;
87         }
88         c = c >> 1;
89     } while (c != 0);
90 
91     return n - x;
92 }
93 
msa_nloc_df(uint32_t df,int64_t arg)94 static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
95 {
96     return msa_nlzc_df(df, UNSIGNED((~arg), df));
97 }
98 
helper_msa_nloc_b(CPUMIPSState * env,uint32_t wd,uint32_t ws)99 void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
100 {
101     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
102     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
103 
104     pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
105     pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
106     pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
107     pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
108     pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
109     pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
110     pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
111     pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
112     pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
113     pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
114     pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
115     pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
116     pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
117     pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
118     pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
119     pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
120 }
121 
helper_msa_nloc_h(CPUMIPSState * env,uint32_t wd,uint32_t ws)122 void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
123 {
124     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
125     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
126 
127     pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
128     pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
129     pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
130     pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
131     pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
132     pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
133     pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
134     pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
135 }
136 
helper_msa_nloc_w(CPUMIPSState * env,uint32_t wd,uint32_t ws)137 void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
138 {
139     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
140     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
141 
142     pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
143     pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
144     pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
145     pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
146 }
147 
helper_msa_nloc_d(CPUMIPSState * env,uint32_t wd,uint32_t ws)148 void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
149 {
150     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
151     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
152 
153     pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
154     pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
155 }
156 
helper_msa_nlzc_b(CPUMIPSState * env,uint32_t wd,uint32_t ws)157 void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
158 {
159     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
160     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
161 
162     pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
163     pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
164     pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
165     pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
166     pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
167     pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
168     pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
169     pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
170     pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
171     pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
172     pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
173     pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
174     pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
175     pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
176     pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
177     pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
178 }
179 
helper_msa_nlzc_h(CPUMIPSState * env,uint32_t wd,uint32_t ws)180 void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
181 {
182     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
183     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
184 
185     pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
186     pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
187     pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
188     pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
189     pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
190     pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
191     pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
192     pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
193 }
194 
helper_msa_nlzc_w(CPUMIPSState * env,uint32_t wd,uint32_t ws)195 void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
196 {
197     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
198     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
199 
200     pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
201     pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
202     pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
203     pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
204 }
205 
helper_msa_nlzc_d(CPUMIPSState * env,uint32_t wd,uint32_t ws)206 void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
207 {
208     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
209     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
210 
211     pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
212     pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
213 }
214 
msa_pcnt_df(uint32_t df,int64_t arg)215 static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
216 {
217     uint64_t x;
218 
219     x = UNSIGNED(arg, df);
220 
221     x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
222     x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
223     x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
224     x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
225     x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
226     x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
227 
228     return x;
229 }
230 
helper_msa_pcnt_b(CPUMIPSState * env,uint32_t wd,uint32_t ws)231 void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
232 {
233     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
234     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
235 
236     pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
237     pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
238     pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
239     pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
240     pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
241     pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
242     pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
243     pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
244     pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
245     pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
246     pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
247     pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
248     pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
249     pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
250     pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
251     pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
252 }
253 
helper_msa_pcnt_h(CPUMIPSState * env,uint32_t wd,uint32_t ws)254 void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
255 {
256     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
257     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
258 
259     pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
260     pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
261     pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
262     pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
263     pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
264     pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
265     pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
266     pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
267 }
268 
helper_msa_pcnt_w(CPUMIPSState * env,uint32_t wd,uint32_t ws)269 void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
270 {
271     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
272     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
273 
274     pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
275     pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
276     pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
277     pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
278 }
279 
helper_msa_pcnt_d(CPUMIPSState * env,uint32_t wd,uint32_t ws)280 void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
281 {
282     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
283     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
284 
285     pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
286     pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
287 }
288 
289 
290 /*
291  * Bit Move
292  * --------
293  *
294  * +---------------+----------------------------------------------------------+
295  * | BINSL.B       | Vector Bit Insert Left (byte)                            |
296  * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
297  * | BINSL.W       | Vector Bit Insert Left (word)                            |
298  * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
299  * | BINSR.B       | Vector Bit Insert Right (byte)                           |
300  * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
301  * | BINSR.W       | Vector Bit Insert Right (word)                           |
302  * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
303  * | BMNZ.V        | Vector Bit Move If Not Zero                              |
304  * | BMZ.V         | Vector Bit Move If Zero                                  |
305  * | BSEL.V        | Vector Bit Select                                        |
306  * +---------------+----------------------------------------------------------+
307  */
308 
309 /* Data format bit position and unsigned values */
310 #define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
311 
msa_binsl_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)312 static inline int64_t msa_binsl_df(uint32_t df,
313                                    int64_t dest, int64_t arg1, int64_t arg2)
314 {
315     uint64_t u_arg1 = UNSIGNED(arg1, df);
316     uint64_t u_dest = UNSIGNED(dest, df);
317     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
318     int32_t sh_a = DF_BITS(df) - sh_d;
319     if (sh_d == DF_BITS(df)) {
320         return u_arg1;
321     } else {
322         return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
323                UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
324     }
325 }
326 
helper_msa_binsl_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)327 void helper_msa_binsl_b(CPUMIPSState *env,
328                         uint32_t wd, uint32_t ws, uint32_t wt)
329 {
330     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
331     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
332     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
333 
334     pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
335     pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
336     pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
337     pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
338     pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
339     pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
340     pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
341     pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
342     pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
343     pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
344     pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
345     pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
346     pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
347     pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
348     pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
349     pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
350 }
351 
helper_msa_binsl_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)352 void helper_msa_binsl_h(CPUMIPSState *env,
353                         uint32_t wd, uint32_t ws, uint32_t wt)
354 {
355     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
356     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
357     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
358 
359     pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
360     pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
361     pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
362     pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
363     pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
364     pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
365     pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
366     pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
367 }
368 
helper_msa_binsl_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)369 void helper_msa_binsl_w(CPUMIPSState *env,
370                         uint32_t wd, uint32_t ws, uint32_t wt)
371 {
372     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
373     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
374     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
375 
376     pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
377     pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
378     pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
379     pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
380 }
381 
helper_msa_binsl_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)382 void helper_msa_binsl_d(CPUMIPSState *env,
383                         uint32_t wd, uint32_t ws, uint32_t wt)
384 {
385     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
386     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
387     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
388 
389     pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
390     pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
391 }
392 
msa_binsr_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)393 static inline int64_t msa_binsr_df(uint32_t df,
394                                    int64_t dest, int64_t arg1, int64_t arg2)
395 {
396     uint64_t u_arg1 = UNSIGNED(arg1, df);
397     uint64_t u_dest = UNSIGNED(dest, df);
398     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
399     int32_t sh_a = DF_BITS(df) - sh_d;
400     if (sh_d == DF_BITS(df)) {
401         return u_arg1;
402     } else {
403         return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
404                UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
405     }
406 }
407 
helper_msa_binsr_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)408 void helper_msa_binsr_b(CPUMIPSState *env,
409                         uint32_t wd, uint32_t ws, uint32_t wt)
410 {
411     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
412     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
413     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
414 
415     pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
416     pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
417     pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
418     pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
419     pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
420     pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
421     pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
422     pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
423     pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
424     pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
425     pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
426     pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
427     pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
428     pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
429     pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
430     pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
431 }
432 
helper_msa_binsr_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)433 void helper_msa_binsr_h(CPUMIPSState *env,
434                         uint32_t wd, uint32_t ws, uint32_t wt)
435 {
436     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
437     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
438     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
439 
440     pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
441     pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
442     pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
443     pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
444     pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
445     pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
446     pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
447     pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
448 }
449 
helper_msa_binsr_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)450 void helper_msa_binsr_w(CPUMIPSState *env,
451                         uint32_t wd, uint32_t ws, uint32_t wt)
452 {
453     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
454     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
455     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
456 
457     pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
458     pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
459     pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
460     pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
461 }
462 
helper_msa_binsr_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)463 void helper_msa_binsr_d(CPUMIPSState *env,
464                         uint32_t wd, uint32_t ws, uint32_t wt)
465 {
466     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
467     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
468     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
469 
470     pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
471     pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
472 }
473 
helper_msa_bmnz_v(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)474 void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
475 {
476     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
477     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
478     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
479 
480     pwd->d[0] = UNSIGNED(                                                     \
481         ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
482     pwd->d[1] = UNSIGNED(                                                     \
483         ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
484 }
485 
helper_msa_bmz_v(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)486 void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
487 {
488     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
489     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
490     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
491 
492     pwd->d[0] = UNSIGNED(                                                     \
493         ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
494     pwd->d[1] = UNSIGNED(                                                     \
495         ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
496 }
497 
helper_msa_bsel_v(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)498 void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
499 {
500     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
501     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
502     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
503 
504     pwd->d[0] = UNSIGNED(                                                     \
505         (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
506     pwd->d[1] = UNSIGNED(                                                     \
507         (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
508 }
509 
510 
511 /*
512  * Bit Set
513  * -------
514  *
515  * +---------------+----------------------------------------------------------+
516  * | BCLR.B        | Vector Bit Clear (byte)                                  |
517  * | BCLR.H        | Vector Bit Clear (halfword)                              |
518  * | BCLR.W        | Vector Bit Clear (word)                                  |
519  * | BCLR.D        | Vector Bit Clear (doubleword)                            |
520  * | BNEG.B        | Vector Bit Negate (byte)                                 |
521  * | BNEG.H        | Vector Bit Negate (halfword)                             |
522  * | BNEG.W        | Vector Bit Negate (word)                                 |
523  * | BNEG.D        | Vector Bit Negate (doubleword)                           |
524  * | BSET.B        | Vector Bit Set (byte)                                    |
525  * | BSET.H        | Vector Bit Set (halfword)                                |
526  * | BSET.W        | Vector Bit Set (word)                                    |
527  * | BSET.D        | Vector Bit Set (doubleword)                              |
528  * +---------------+----------------------------------------------------------+
529  */
530 
msa_bclr_df(uint32_t df,int64_t arg1,int64_t arg2)531 static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
532 {
533     int32_t b_arg2 = BIT_POSITION(arg2, df);
534     return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
535 }
536 
helper_msa_bclr_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)537 void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
538 {
539     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
540     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
541     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
542 
543     pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
544     pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
545     pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
546     pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
547     pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
548     pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
549     pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
550     pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
551     pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
552     pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
553     pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
554     pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
555     pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
556     pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
557     pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
558     pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
559 }
560 
helper_msa_bclr_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)561 void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
562 {
563     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
564     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
565     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
566 
567     pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
568     pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
569     pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
570     pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
571     pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
572     pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
573     pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
574     pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
575 }
576 
helper_msa_bclr_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)577 void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
578 {
579     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
580     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
581     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
582 
583     pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
584     pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
585     pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
586     pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
587 }
588 
helper_msa_bclr_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)589 void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
590 {
591     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
592     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
593     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
594 
595     pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
596     pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
597 }
598 
msa_bneg_df(uint32_t df,int64_t arg1,int64_t arg2)599 static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
600 {
601     int32_t b_arg2 = BIT_POSITION(arg2, df);
602     return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
603 }
604 
helper_msa_bneg_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)605 void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
606 {
607     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
608     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
609     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
610 
611     pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
612     pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
613     pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
614     pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
615     pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
616     pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
617     pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
618     pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
619     pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
620     pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
621     pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
622     pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
623     pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
624     pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
625     pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
626     pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
627 }
628 
helper_msa_bneg_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)629 void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
630 {
631     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
632     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
633     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
634 
635     pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
636     pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
637     pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
638     pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
639     pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
640     pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
641     pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
642     pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
643 }
644 
helper_msa_bneg_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)645 void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
646 {
647     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
648     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
649     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
650 
651     pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
652     pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
653     pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
654     pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
655 }
656 
helper_msa_bneg_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)657 void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
658 {
659     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
660     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
661     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
662 
663     pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
664     pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
665 }
666 
msa_bset_df(uint32_t df,int64_t arg1,int64_t arg2)667 static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
668         int64_t arg2)
669 {
670     int32_t b_arg2 = BIT_POSITION(arg2, df);
671     return UNSIGNED(arg1 | (1LL << b_arg2), df);
672 }
673 
helper_msa_bset_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)674 void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
675 {
676     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
677     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
678     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
679 
680     pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
681     pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
682     pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
683     pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
684     pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
685     pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
686     pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
687     pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
688     pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
689     pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
690     pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
691     pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
692     pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
693     pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
694     pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
695     pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
696 }
697 
helper_msa_bset_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)698 void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
699 {
700     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
701     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
702     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
703 
704     pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
705     pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
706     pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
707     pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
708     pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
709     pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
710     pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
711     pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
712 }
713 
helper_msa_bset_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)714 void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
715 {
716     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
717     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
718     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
719 
720     pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
721     pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
722     pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
723     pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
724 }
725 
helper_msa_bset_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)726 void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
727 {
728     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
729     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
730     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
731 
732     pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
733     pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
734 }
735 
736 
737 /*
738  * Fixed Multiply
739  * --------------
740  *
741  * +---------------+----------------------------------------------------------+
742  * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
743  * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
744  * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
745  * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
746  * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
747  * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
748  * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
749  * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
750  * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
751  * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
752  * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
753  * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
754  * +---------------+----------------------------------------------------------+
755  */
756 
757 /* TODO: insert Fixed Multiply group helpers here */
758 
759 
760 /*
761  * Float Max Min
762  * -------------
763  *
764  * +---------------+----------------------------------------------------------+
765  * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
766  * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
767  * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
768  * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
769  * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
770  * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
771  * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
772  * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
773  * +---------------+----------------------------------------------------------+
774  */
775 
776 /* TODO: insert Float Max Min group helpers here */
777 
778 
779 /*
780  * Int Add
781  * -------
782  *
783  * +---------------+----------------------------------------------------------+
784  * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
785  * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
786  * | ADD_A.W       | Vector Add Absolute Values (word)                        |
787  * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
788  * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
789  * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
790  * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
791  * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
792  * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
793  * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
794  * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
795  * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
796  * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
797  * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
798  * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
799  * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
800  * | ADDV.B        | Vector Add (byte)                                        |
801  * | ADDV.H        | Vector Add (halfword)                                    |
802  * | ADDV.W        | Vector Add (word)                                        |
803  * | ADDV.D        | Vector Add (doubleword)                                  |
804  * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
805  * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
806  * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
807  * | HADD_U.H      | Vector Unsigned Horizontal Add (halfword)                |
808  * | HADD_U.W      | Vector Unsigned Horizontal Add (word)                    |
809  * | HADD_U.D      | Vector Unsigned Horizontal Add (doubleword)              |
810  * +---------------+----------------------------------------------------------+
811  */
812 
813 
msa_add_a_df(uint32_t df,int64_t arg1,int64_t arg2)814 static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
815 {
816     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
817     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
818     return abs_arg1 + abs_arg2;
819 }
820 
helper_msa_add_a_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)821 void helper_msa_add_a_b(CPUMIPSState *env,
822                         uint32_t wd, uint32_t ws, uint32_t wt)
823 {
824     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
825     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
826     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
827 
828     pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
829     pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
830     pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
831     pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
832     pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
833     pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
834     pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
835     pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
836     pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
837     pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
838     pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
839     pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
840     pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
841     pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
842     pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
843     pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
844 }
845 
helper_msa_add_a_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)846 void helper_msa_add_a_h(CPUMIPSState *env,
847                         uint32_t wd, uint32_t ws, uint32_t wt)
848 {
849     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
850     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
851     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
852 
853     pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
854     pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
855     pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
856     pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
857     pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
858     pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
859     pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
860     pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
861 }
862 
helper_msa_add_a_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)863 void helper_msa_add_a_w(CPUMIPSState *env,
864                         uint32_t wd, uint32_t ws, uint32_t wt)
865 {
866     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
867     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
868     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
869 
870     pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
871     pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
872     pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
873     pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
874 }
875 
helper_msa_add_a_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)876 void helper_msa_add_a_d(CPUMIPSState *env,
877                         uint32_t wd, uint32_t ws, uint32_t wt)
878 {
879     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
880     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
881     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
882 
883     pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
884     pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
885 }
886 
887 
msa_adds_a_df(uint32_t df,int64_t arg1,int64_t arg2)888 static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
889 {
890     uint64_t max_int = (uint64_t)DF_MAX_INT(df);
891     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
892     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
893     if (abs_arg1 > max_int || abs_arg2 > max_int) {
894         return (int64_t)max_int;
895     } else {
896         return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
897     }
898 }
899 
helper_msa_adds_a_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)900 void helper_msa_adds_a_b(CPUMIPSState *env,
901                          uint32_t wd, uint32_t ws, uint32_t wt)
902 {
903     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
904     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
905     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
906 
907     pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
908     pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
909     pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
910     pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
911     pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
912     pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
913     pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
914     pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
915     pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
916     pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
917     pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
918     pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
919     pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
920     pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
921     pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
922     pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
923 }
924 
helper_msa_adds_a_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)925 void helper_msa_adds_a_h(CPUMIPSState *env,
926                          uint32_t wd, uint32_t ws, uint32_t wt)
927 {
928     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
929     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
930     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
931 
932     pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
933     pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
934     pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
935     pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
936     pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
937     pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
938     pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
939     pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
940 }
941 
helper_msa_adds_a_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)942 void helper_msa_adds_a_w(CPUMIPSState *env,
943                          uint32_t wd, uint32_t ws, uint32_t wt)
944 {
945     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
946     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
947     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
948 
949     pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
950     pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
951     pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
952     pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
953 }
954 
helper_msa_adds_a_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)955 void helper_msa_adds_a_d(CPUMIPSState *env,
956                          uint32_t wd, uint32_t ws, uint32_t wt)
957 {
958     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
959     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
960     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
961 
962     pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
963     pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
964 }
965 
966 
msa_adds_s_df(uint32_t df,int64_t arg1,int64_t arg2)967 static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
968 {
969     int64_t max_int = DF_MAX_INT(df);
970     int64_t min_int = DF_MIN_INT(df);
971     if (arg1 < 0) {
972         return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
973     } else {
974         return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
975     }
976 }
977 
helper_msa_adds_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)978 void helper_msa_adds_s_b(CPUMIPSState *env,
979                          uint32_t wd, uint32_t ws, uint32_t wt)
980 {
981     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
982     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
983     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
984 
985     pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
986     pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
987     pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
988     pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
989     pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
990     pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
991     pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
992     pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
993     pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
994     pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
995     pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
996     pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
997     pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
998     pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
999     pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1000     pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1001 }
1002 
helper_msa_adds_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1003 void helper_msa_adds_s_h(CPUMIPSState *env,
1004                          uint32_t wd, uint32_t ws, uint32_t wt)
1005 {
1006     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1007     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1008     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1009 
1010     pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1011     pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1012     pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1013     pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1014     pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1015     pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1016     pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1017     pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1018 }
1019 
helper_msa_adds_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1020 void helper_msa_adds_s_w(CPUMIPSState *env,
1021                          uint32_t wd, uint32_t ws, uint32_t wt)
1022 {
1023     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1024     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1025     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1026 
1027     pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1028     pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1029     pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1030     pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1031 }
1032 
helper_msa_adds_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1033 void helper_msa_adds_s_d(CPUMIPSState *env,
1034                          uint32_t wd, uint32_t ws, uint32_t wt)
1035 {
1036     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1037     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1038     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1039 
1040     pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1041     pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1042 }
1043 
1044 
msa_adds_u_df(uint32_t df,uint64_t arg1,uint64_t arg2)1045 static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1046 {
1047     uint64_t max_uint = DF_MAX_UINT(df);
1048     uint64_t u_arg1 = UNSIGNED(arg1, df);
1049     uint64_t u_arg2 = UNSIGNED(arg2, df);
1050     return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
1051 }
1052 
helper_msa_adds_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1053 void helper_msa_adds_u_b(CPUMIPSState *env,
1054                          uint32_t wd, uint32_t ws, uint32_t wt)
1055 {
1056     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1057     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1058     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1059 
1060     pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1061     pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1062     pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1063     pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1064     pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1065     pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1066     pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1067     pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1068     pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1069     pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1070     pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1071     pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1072     pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1073     pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1074     pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1075     pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1076 }
1077 
helper_msa_adds_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1078 void helper_msa_adds_u_h(CPUMIPSState *env,
1079                          uint32_t wd, uint32_t ws, uint32_t wt)
1080 {
1081     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1082     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1083     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1084 
1085     pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1086     pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1087     pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1088     pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1089     pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1090     pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1091     pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1092     pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1093 }
1094 
helper_msa_adds_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1095 void helper_msa_adds_u_w(CPUMIPSState *env,
1096                          uint32_t wd, uint32_t ws, uint32_t wt)
1097 {
1098     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1099     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1100     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1101 
1102     pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1103     pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1104     pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1105     pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1106 }
1107 
helper_msa_adds_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1108 void helper_msa_adds_u_d(CPUMIPSState *env,
1109                          uint32_t wd, uint32_t ws, uint32_t wt)
1110 {
1111     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1112     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1113     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1114 
1115     pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1116     pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1117 }
1118 
1119 
msa_addv_df(uint32_t df,int64_t arg1,int64_t arg2)1120 static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
1121 {
1122     return arg1 + arg2;
1123 }
1124 
helper_msa_addv_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1125 void helper_msa_addv_b(CPUMIPSState *env,
1126                        uint32_t wd, uint32_t ws, uint32_t wt)
1127 {
1128     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1129     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1130     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1131 
1132     pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1133     pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1134     pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1135     pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1136     pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1137     pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1138     pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1139     pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1140     pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1141     pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1142     pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
1143     pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
1144     pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
1145     pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
1146     pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
1147     pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
1148 }
1149 
helper_msa_addv_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1150 void helper_msa_addv_h(CPUMIPSState *env,
1151                        uint32_t wd, uint32_t ws, uint32_t wt)
1152 {
1153     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1154     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1155     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1156 
1157     pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
1158     pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
1159     pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
1160     pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
1161     pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
1162     pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
1163     pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
1164     pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
1165 }
1166 
helper_msa_addv_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1167 void helper_msa_addv_w(CPUMIPSState *env,
1168                        uint32_t wd, uint32_t ws, uint32_t wt)
1169 {
1170     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1171     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1172     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1173 
1174     pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
1175     pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
1176     pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
1177     pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
1178 }
1179 
helper_msa_addv_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1180 void helper_msa_addv_d(CPUMIPSState *env,
1181                        uint32_t wd, uint32_t ws, uint32_t wt)
1182 {
1183     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1184     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1185     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1186 
1187     pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1188     pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1189 }
1190 
1191 
1192 #define SIGNED_EVEN(a, df) \
1193         ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1194 
1195 #define UNSIGNED_EVEN(a, df) \
1196         ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1197 
1198 #define SIGNED_ODD(a, df) \
1199         ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1200 
1201 #define UNSIGNED_ODD(a, df) \
1202         ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1203 
1204 
msa_hadd_s_df(uint32_t df,int64_t arg1,int64_t arg2)1205 static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1206 {
1207     return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
1208 }
1209 
helper_msa_hadd_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1210 void helper_msa_hadd_s_h(CPUMIPSState *env,
1211                          uint32_t wd, uint32_t ws, uint32_t wt)
1212 {
1213     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1214     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1215     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1216 
1217     pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1218     pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1219     pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1220     pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1221     pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1222     pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1223     pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1224     pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1225 }
1226 
helper_msa_hadd_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1227 void helper_msa_hadd_s_w(CPUMIPSState *env,
1228                          uint32_t wd, uint32_t ws, uint32_t wt)
1229 {
1230     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1231     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1232     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1233 
1234     pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1235     pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1236     pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1237     pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1238 }
1239 
helper_msa_hadd_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1240 void helper_msa_hadd_s_d(CPUMIPSState *env,
1241                          uint32_t wd, uint32_t ws, uint32_t wt)
1242 {
1243     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1244     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1245     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1246 
1247     pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1248     pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1249 }
1250 
1251 
msa_hadd_u_df(uint32_t df,int64_t arg1,int64_t arg2)1252 static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1253 {
1254     return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
1255 }
1256 
helper_msa_hadd_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1257 void helper_msa_hadd_u_h(CPUMIPSState *env,
1258                          uint32_t wd, uint32_t ws, uint32_t wt)
1259 {
1260     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1261     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1262     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1263 
1264     pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1265     pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1266     pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1267     pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1268     pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1269     pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1270     pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1271     pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1272 }
1273 
helper_msa_hadd_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1274 void helper_msa_hadd_u_w(CPUMIPSState *env,
1275                          uint32_t wd, uint32_t ws, uint32_t wt)
1276 {
1277     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1278     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1279     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1280 
1281     pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1282     pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1283     pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1284     pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1285 }
1286 
helper_msa_hadd_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1287 void helper_msa_hadd_u_d(CPUMIPSState *env,
1288                          uint32_t wd, uint32_t ws, uint32_t wt)
1289 {
1290     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1291     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1292     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1293 
1294     pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1295     pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1296 }
1297 
1298 
1299 /*
1300  * Int Average
1301  * -----------
1302  *
1303  * +---------------+----------------------------------------------------------+
1304  * | AVE_S.B       | Vector Signed Average (byte)                             |
1305  * | AVE_S.H       | Vector Signed Average (halfword)                         |
1306  * | AVE_S.W       | Vector Signed Average (word)                             |
1307  * | AVE_S.D       | Vector Signed Average (doubleword)                       |
1308  * | AVE_U.B       | Vector Unsigned Average (byte)                           |
1309  * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
1310  * | AVE_U.W       | Vector Unsigned Average (word)                           |
1311  * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
1312  * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
1313  * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
1314  * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
1315  * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
1316  * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
1317  * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
1318  * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
1319  * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
1320  * +---------------+----------------------------------------------------------+
1321  */
1322 
msa_ave_s_df(uint32_t df,int64_t arg1,int64_t arg2)1323 static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1324 {
1325     /* signed shift */
1326     return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
1327 }
1328 
helper_msa_ave_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1329 void helper_msa_ave_s_b(CPUMIPSState *env,
1330                         uint32_t wd, uint32_t ws, uint32_t wt)
1331 {
1332     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1333     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1334     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1335 
1336     pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1337     pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1338     pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1339     pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1340     pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1341     pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1342     pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1343     pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1344     pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1345     pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1346     pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1347     pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1348     pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1349     pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1350     pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1351     pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1352 }
1353 
helper_msa_ave_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1354 void helper_msa_ave_s_h(CPUMIPSState *env,
1355                         uint32_t wd, uint32_t ws, uint32_t wt)
1356 {
1357     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1358     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1359     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1360 
1361     pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1362     pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1363     pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1364     pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1365     pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1366     pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1367     pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1368     pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1369 }
1370 
helper_msa_ave_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1371 void helper_msa_ave_s_w(CPUMIPSState *env,
1372                         uint32_t wd, uint32_t ws, uint32_t wt)
1373 {
1374     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1375     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1376     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1377 
1378     pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1379     pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1380     pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1381     pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1382 }
1383 
helper_msa_ave_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1384 void helper_msa_ave_s_d(CPUMIPSState *env,
1385                         uint32_t wd, uint32_t ws, uint32_t wt)
1386 {
1387     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1388     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1389     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1390 
1391     pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1392     pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1393 }
1394 
msa_ave_u_df(uint32_t df,uint64_t arg1,uint64_t arg2)1395 static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1396 {
1397     uint64_t u_arg1 = UNSIGNED(arg1, df);
1398     uint64_t u_arg2 = UNSIGNED(arg2, df);
1399     /* unsigned shift */
1400     return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
1401 }
1402 
helper_msa_ave_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1403 void helper_msa_ave_u_b(CPUMIPSState *env,
1404                         uint32_t wd, uint32_t ws, uint32_t wt)
1405 {
1406     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1407     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1408     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1409 
1410     pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1411     pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1412     pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1413     pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1414     pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1415     pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1416     pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1417     pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1418     pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1419     pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1420     pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1421     pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1422     pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1423     pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1424     pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1425     pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1426 }
1427 
helper_msa_ave_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1428 void helper_msa_ave_u_h(CPUMIPSState *env,
1429                         uint32_t wd, uint32_t ws, uint32_t wt)
1430 {
1431     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1432     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1433     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1434 
1435     pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1436     pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1437     pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1438     pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1439     pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1440     pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1441     pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1442     pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1443 }
1444 
helper_msa_ave_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1445 void helper_msa_ave_u_w(CPUMIPSState *env,
1446                         uint32_t wd, uint32_t ws, uint32_t wt)
1447 {
1448     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1449     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1450     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1451 
1452     pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1453     pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1454     pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1455     pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1456 }
1457 
helper_msa_ave_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1458 void helper_msa_ave_u_d(CPUMIPSState *env,
1459                         uint32_t wd, uint32_t ws, uint32_t wt)
1460 {
1461     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1462     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1463     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1464 
1465     pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1466     pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1467 }
1468 
msa_aver_s_df(uint32_t df,int64_t arg1,int64_t arg2)1469 static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1470 {
1471     /* signed shift */
1472     return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
1473 }
1474 
helper_msa_aver_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1475 void helper_msa_aver_s_b(CPUMIPSState *env,
1476                          uint32_t wd, uint32_t ws, uint32_t wt)
1477 {
1478     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1479     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1480     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1481 
1482     pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1483     pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1484     pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1485     pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1486     pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1487     pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1488     pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1489     pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1490     pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1491     pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1492     pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1493     pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1494     pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1495     pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1496     pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1497     pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1498 }
1499 
helper_msa_aver_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1500 void helper_msa_aver_s_h(CPUMIPSState *env,
1501                          uint32_t wd, uint32_t ws, uint32_t wt)
1502 {
1503     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1504     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1505     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1506 
1507     pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1508     pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1509     pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1510     pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1511     pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1512     pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1513     pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1514     pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1515 }
1516 
helper_msa_aver_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1517 void helper_msa_aver_s_w(CPUMIPSState *env,
1518                          uint32_t wd, uint32_t ws, uint32_t wt)
1519 {
1520     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1521     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1522     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1523 
1524     pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1525     pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1526     pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1527     pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1528 }
1529 
helper_msa_aver_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1530 void helper_msa_aver_s_d(CPUMIPSState *env,
1531                          uint32_t wd, uint32_t ws, uint32_t wt)
1532 {
1533     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1534     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1535     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1536 
1537     pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1538     pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1539 }
1540 
msa_aver_u_df(uint32_t df,uint64_t arg1,uint64_t arg2)1541 static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1542 {
1543     uint64_t u_arg1 = UNSIGNED(arg1, df);
1544     uint64_t u_arg2 = UNSIGNED(arg2, df);
1545     /* unsigned shift */
1546     return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
1547 }
1548 
helper_msa_aver_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1549 void helper_msa_aver_u_b(CPUMIPSState *env,
1550                          uint32_t wd, uint32_t ws, uint32_t wt)
1551 {
1552     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1553     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1554     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1555 
1556     pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1557     pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1558     pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1559     pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1560     pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1561     pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1562     pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1563     pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1564     pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1565     pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1566     pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1567     pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1568     pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1569     pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1570     pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1571     pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1572 }
1573 
helper_msa_aver_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1574 void helper_msa_aver_u_h(CPUMIPSState *env,
1575                          uint32_t wd, uint32_t ws, uint32_t wt)
1576 {
1577     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1578     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1579     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1580 
1581     pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1582     pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1583     pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1584     pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1585     pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1586     pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1587     pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1588     pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1589 }
1590 
helper_msa_aver_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1591 void helper_msa_aver_u_w(CPUMIPSState *env,
1592                          uint32_t wd, uint32_t ws, uint32_t wt)
1593 {
1594     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1595     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1596     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1597 
1598     pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1599     pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1600     pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1601     pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1602 }
1603 
helper_msa_aver_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1604 void helper_msa_aver_u_d(CPUMIPSState *env,
1605                          uint32_t wd, uint32_t ws, uint32_t wt)
1606 {
1607     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1608     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1609     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1610 
1611     pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1612     pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1613 }
1614 
1615 
1616 /*
1617  * Int Compare
1618  * -----------
1619  *
1620  * +---------------+----------------------------------------------------------+
1621  * | CEQ.B         | Vector Compare Equal (byte)                              |
1622  * | CEQ.H         | Vector Compare Equal (halfword)                          |
1623  * | CEQ.W         | Vector Compare Equal (word)                              |
1624  * | CEQ.D         | Vector Compare Equal (doubleword)                        |
1625  * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
1626  * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
1627  * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
1628  * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
1629  * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
1630  * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
1631  * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
1632  * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
1633  * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
1634  * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
1635  * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
1636  * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
1637  * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
1638  * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
1639  * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
1640  * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
1641  * +---------------+----------------------------------------------------------+
1642  */
1643 
msa_ceq_df(uint32_t df,int64_t arg1,int64_t arg2)1644 static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
1645 {
1646     return arg1 == arg2 ? -1 : 0;
1647 }
1648 
msa_ceq_b(int8_t arg1,int8_t arg2)1649 static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
1650 {
1651     return arg1 == arg2 ? -1 : 0;
1652 }
1653 
helper_msa_ceq_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1654 void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1655 {
1656     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1657     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1658     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1659 
1660     pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
1661     pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
1662     pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
1663     pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
1664     pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
1665     pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
1666     pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
1667     pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
1668     pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
1669     pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
1670     pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
1671     pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
1672     pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
1673     pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
1674     pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
1675     pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
1676 }
1677 
msa_ceq_h(int16_t arg1,int16_t arg2)1678 static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
1679 {
1680     return arg1 == arg2 ? -1 : 0;
1681 }
1682 
helper_msa_ceq_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1683 void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1684 {
1685     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1686     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1687     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1688 
1689     pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
1690     pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
1691     pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
1692     pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
1693     pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
1694     pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
1695     pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
1696     pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
1697 }
1698 
msa_ceq_w(int32_t arg1,int32_t arg2)1699 static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
1700 {
1701     return arg1 == arg2 ? -1 : 0;
1702 }
1703 
helper_msa_ceq_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1704 void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1705 {
1706     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1707     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1708     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1709 
1710     pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
1711     pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
1712     pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
1713     pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
1714 }
1715 
msa_ceq_d(int64_t arg1,int64_t arg2)1716 static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
1717 {
1718     return arg1 == arg2 ? -1 : 0;
1719 }
1720 
helper_msa_ceq_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1721 void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1722 {
1723     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1724     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1725     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1726 
1727     pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
1728     pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
1729 }
1730 
msa_cle_s_df(uint32_t df,int64_t arg1,int64_t arg2)1731 static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1732 {
1733     return arg1 <= arg2 ? -1 : 0;
1734 }
1735 
helper_msa_cle_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1736 void helper_msa_cle_s_b(CPUMIPSState *env,
1737                         uint32_t wd, uint32_t ws, uint32_t wt)
1738 {
1739     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1740     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1741     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1742 
1743     pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1744     pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1745     pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1746     pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1747     pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1748     pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1749     pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1750     pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1751     pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1752     pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1753     pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1754     pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1755     pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1756     pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1757     pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1758     pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1759 }
1760 
helper_msa_cle_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1761 void helper_msa_cle_s_h(CPUMIPSState *env,
1762                         uint32_t wd, uint32_t ws, uint32_t wt)
1763 {
1764     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1765     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1766     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1767 
1768     pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1769     pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1770     pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1771     pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1772     pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1773     pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1774     pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1775     pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1776 }
1777 
helper_msa_cle_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1778 void helper_msa_cle_s_w(CPUMIPSState *env,
1779                         uint32_t wd, uint32_t ws, uint32_t wt)
1780 {
1781     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1782     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1783     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1784 
1785     pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1786     pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1787     pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1788     pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1789 }
1790 
helper_msa_cle_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1791 void helper_msa_cle_s_d(CPUMIPSState *env,
1792                         uint32_t wd, uint32_t ws, uint32_t wt)
1793 {
1794     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1795     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1796     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1797 
1798     pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1799     pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1800 }
1801 
msa_cle_u_df(uint32_t df,int64_t arg1,int64_t arg2)1802 static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1803 {
1804     uint64_t u_arg1 = UNSIGNED(arg1, df);
1805     uint64_t u_arg2 = UNSIGNED(arg2, df);
1806     return u_arg1 <= u_arg2 ? -1 : 0;
1807 }
1808 
helper_msa_cle_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1809 void helper_msa_cle_u_b(CPUMIPSState *env,
1810                         uint32_t wd, uint32_t ws, uint32_t wt)
1811 {
1812     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1813     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1814     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1815 
1816     pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1817     pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1818     pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1819     pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1820     pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1821     pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1822     pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1823     pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1824     pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1825     pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1826     pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1827     pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1828     pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1829     pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1830     pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1831     pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1832 }
1833 
helper_msa_cle_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1834 void helper_msa_cle_u_h(CPUMIPSState *env,
1835                         uint32_t wd, uint32_t ws, uint32_t wt)
1836 {
1837     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1838     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1839     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1840 
1841     pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1842     pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1843     pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1844     pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1845     pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1846     pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1847     pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1848     pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1849 }
1850 
helper_msa_cle_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1851 void helper_msa_cle_u_w(CPUMIPSState *env,
1852                         uint32_t wd, uint32_t ws, uint32_t wt)
1853 {
1854     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1855     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1856     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1857 
1858     pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1859     pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1860     pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1861     pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1862 }
1863 
helper_msa_cle_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1864 void helper_msa_cle_u_d(CPUMIPSState *env,
1865                         uint32_t wd, uint32_t ws, uint32_t wt)
1866 {
1867     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1868     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1869     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1870 
1871     pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1872     pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1873 }
1874 
msa_clt_s_df(uint32_t df,int64_t arg1,int64_t arg2)1875 static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1876 {
1877     return arg1 < arg2 ? -1 : 0;
1878 }
1879 
msa_clt_s_b(int8_t arg1,int8_t arg2)1880 static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
1881 {
1882     return arg1 < arg2 ? -1 : 0;
1883 }
1884 
helper_msa_clt_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1885 void helper_msa_clt_s_b(CPUMIPSState *env,
1886                         uint32_t wd, uint32_t ws, uint32_t wt)
1887 {
1888     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1889     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1890     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1891 
1892     pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
1893     pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
1894     pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
1895     pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
1896     pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
1897     pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
1898     pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
1899     pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
1900     pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
1901     pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
1902     pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
1903     pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
1904     pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
1905     pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
1906     pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
1907     pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
1908 }
1909 
msa_clt_s_h(int16_t arg1,int16_t arg2)1910 static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
1911 {
1912     return arg1 < arg2 ? -1 : 0;
1913 }
1914 
helper_msa_clt_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1915 void helper_msa_clt_s_h(CPUMIPSState *env,
1916                         uint32_t wd, uint32_t ws, uint32_t wt)
1917 {
1918     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1919     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1920     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1921 
1922     pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
1923     pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
1924     pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
1925     pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
1926     pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
1927     pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
1928     pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
1929     pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
1930 }
1931 
msa_clt_s_w(int32_t arg1,int32_t arg2)1932 static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
1933 {
1934     return arg1 < arg2 ? -1 : 0;
1935 }
1936 
helper_msa_clt_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1937 void helper_msa_clt_s_w(CPUMIPSState *env,
1938                         uint32_t wd, uint32_t ws, uint32_t wt)
1939 {
1940     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1941     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1942     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1943 
1944     pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
1945     pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
1946     pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
1947     pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
1948 }
1949 
msa_clt_s_d(int64_t arg1,int64_t arg2)1950 static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
1951 {
1952     return arg1 < arg2 ? -1 : 0;
1953 }
1954 
helper_msa_clt_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1955 void helper_msa_clt_s_d(CPUMIPSState *env,
1956                         uint32_t wd, uint32_t ws, uint32_t wt)
1957 {
1958     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1959     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1960     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1961 
1962     pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
1963     pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
1964 }
1965 
msa_clt_u_df(uint32_t df,int64_t arg1,int64_t arg2)1966 static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1967 {
1968     uint64_t u_arg1 = UNSIGNED(arg1, df);
1969     uint64_t u_arg2 = UNSIGNED(arg2, df);
1970     return u_arg1 < u_arg2 ? -1 : 0;
1971 }
1972 
helper_msa_clt_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1973 void helper_msa_clt_u_b(CPUMIPSState *env,
1974                         uint32_t wd, uint32_t ws, uint32_t wt)
1975 {
1976     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1977     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1978     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1979 
1980     pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1981     pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1982     pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1983     pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1984     pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1985     pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1986     pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1987     pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1988     pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1989     pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1990     pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1991     pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1992     pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1993     pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1994     pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1995     pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1996 }
1997 
helper_msa_clt_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)1998 void helper_msa_clt_u_h(CPUMIPSState *env,
1999                         uint32_t wd, uint32_t ws, uint32_t wt)
2000 {
2001     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2002     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2003     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2004 
2005     pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2006     pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2007     pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2008     pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2009     pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2010     pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2011     pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2012     pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2013 }
2014 
helper_msa_clt_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2015 void helper_msa_clt_u_w(CPUMIPSState *env,
2016                         uint32_t wd, uint32_t ws, uint32_t wt)
2017 {
2018     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2019     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2020     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2021 
2022     pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2023     pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2024     pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2025     pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2026 }
2027 
helper_msa_clt_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2028 void helper_msa_clt_u_d(CPUMIPSState *env,
2029                         uint32_t wd, uint32_t ws, uint32_t wt)
2030 {
2031     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2032     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2033     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2034 
2035     pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2036     pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2037 }
2038 
2039 
2040 /*
2041  * Int Divide
2042  * ----------
2043  *
2044  * +---------------+----------------------------------------------------------+
2045  * | DIV_S.B       | Vector Signed Divide (byte)                              |
2046  * | DIV_S.H       | Vector Signed Divide (halfword)                          |
2047  * | DIV_S.W       | Vector Signed Divide (word)                              |
2048  * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
2049  * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
2050  * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
2051  * | DIV_U.W       | Vector Unsigned Divide (word)                            |
2052  * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
2053  * +---------------+----------------------------------------------------------+
2054  */
2055 
2056 
msa_div_s_df(uint32_t df,int64_t arg1,int64_t arg2)2057 static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2058 {
2059     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
2060         return DF_MIN_INT(df);
2061     }
2062     return arg2 ? arg1 / arg2
2063                 : arg1 >= 0 ? -1 : 1;
2064 }
2065 
helper_msa_div_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2066 void helper_msa_div_s_b(CPUMIPSState *env,
2067                         uint32_t wd, uint32_t ws, uint32_t wt)
2068 {
2069     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2070     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2071     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2072 
2073     pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2074     pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2075     pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2076     pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2077     pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2078     pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2079     pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2080     pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2081     pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2082     pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2083     pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2084     pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2085     pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2086     pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2087     pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2088     pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2089 }
2090 
helper_msa_div_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2091 void helper_msa_div_s_h(CPUMIPSState *env,
2092                         uint32_t wd, uint32_t ws, uint32_t wt)
2093 {
2094     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2095     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2096     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2097 
2098     pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2099     pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2100     pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2101     pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2102     pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2103     pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2104     pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2105     pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2106 }
2107 
helper_msa_div_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2108 void helper_msa_div_s_w(CPUMIPSState *env,
2109                         uint32_t wd, uint32_t ws, uint32_t wt)
2110 {
2111     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2112     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2113     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2114 
2115     pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2116     pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2117     pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2118     pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2119 }
2120 
helper_msa_div_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2121 void helper_msa_div_s_d(CPUMIPSState *env,
2122                         uint32_t wd, uint32_t ws, uint32_t wt)
2123 {
2124     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2125     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2126     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2127 
2128     pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2129     pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2130 }
2131 
msa_div_u_df(uint32_t df,int64_t arg1,int64_t arg2)2132 static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2133 {
2134     uint64_t u_arg1 = UNSIGNED(arg1, df);
2135     uint64_t u_arg2 = UNSIGNED(arg2, df);
2136     return arg2 ? u_arg1 / u_arg2 : -1;
2137 }
2138 
helper_msa_div_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2139 void helper_msa_div_u_b(CPUMIPSState *env,
2140                         uint32_t wd, uint32_t ws, uint32_t wt)
2141 {
2142     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2143     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2144     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2145 
2146     pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2147     pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2148     pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2149     pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2150     pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2151     pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2152     pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2153     pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2154     pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2155     pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2156     pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2157     pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2158     pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2159     pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2160     pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2161     pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2162 }
2163 
helper_msa_div_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2164 void helper_msa_div_u_h(CPUMIPSState *env,
2165                         uint32_t wd, uint32_t ws, uint32_t wt)
2166 {
2167     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2168     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2169     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2170 
2171     pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2172     pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2173     pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2174     pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2175     pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2176     pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2177     pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2178     pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2179 }
2180 
helper_msa_div_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2181 void helper_msa_div_u_w(CPUMIPSState *env,
2182                         uint32_t wd, uint32_t ws, uint32_t wt)
2183 {
2184     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2185     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2186     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2187 
2188     pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2189     pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2190     pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2191     pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2192 }
2193 
helper_msa_div_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2194 void helper_msa_div_u_d(CPUMIPSState *env,
2195                         uint32_t wd, uint32_t ws, uint32_t wt)
2196 {
2197     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2198     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2199     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2200 
2201     pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2202     pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2203 }
2204 
2205 
2206 /*
2207  * Int Dot Product
2208  * ---------------
2209  *
2210  * +---------------+----------------------------------------------------------+
2211  * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
2212  * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
2213  * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
2214  * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
2215  * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
2216  * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
2217  * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
2218  * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
2219  * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
2220  * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
2221  * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
2222  * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2223  * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
2224  * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
2225  * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
2226  * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
2227  * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
2228  * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2229  * +---------------+----------------------------------------------------------+
2230  */
2231 
2232 #define SIGNED_EXTRACT(e, o, a, df)     \
2233     do {                                \
2234         e = SIGNED_EVEN(a, df);         \
2235         o = SIGNED_ODD(a, df);          \
2236     } while (0)
2237 
2238 #define UNSIGNED_EXTRACT(e, o, a, df)   \
2239     do {                                \
2240         e = UNSIGNED_EVEN(a, df);       \
2241         o = UNSIGNED_ODD(a, df);        \
2242     } while (0)
2243 
2244 
msa_dotp_s_df(uint32_t df,int64_t arg1,int64_t arg2)2245 static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2246 {
2247     int64_t even_arg1;
2248     int64_t even_arg2;
2249     int64_t odd_arg1;
2250     int64_t odd_arg2;
2251     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2252     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2253     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2254 }
2255 
helper_msa_dotp_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2256 void helper_msa_dotp_s_h(CPUMIPSState *env,
2257                          uint32_t wd, uint32_t ws, uint32_t wt)
2258 {
2259     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2260     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2261     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2262 
2263     pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2264     pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2265     pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2266     pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2267     pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2268     pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2269     pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2270     pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2271 }
2272 
helper_msa_dotp_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2273 void helper_msa_dotp_s_w(CPUMIPSState *env,
2274                          uint32_t wd, uint32_t ws, uint32_t wt)
2275 {
2276     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2277     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2278     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2279 
2280     pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2281     pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2282     pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2283     pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2284 }
2285 
helper_msa_dotp_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2286 void helper_msa_dotp_s_d(CPUMIPSState *env,
2287                          uint32_t wd, uint32_t ws, uint32_t wt)
2288 {
2289     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2290     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2291     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2292 
2293     pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2294     pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2295 }
2296 
2297 
msa_dotp_u_df(uint32_t df,int64_t arg1,int64_t arg2)2298 static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2299 {
2300     int64_t even_arg1;
2301     int64_t even_arg2;
2302     int64_t odd_arg1;
2303     int64_t odd_arg2;
2304     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2305     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2306     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2307 }
2308 
helper_msa_dotp_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2309 void helper_msa_dotp_u_h(CPUMIPSState *env,
2310                          uint32_t wd, uint32_t ws, uint32_t wt)
2311 {
2312     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2313     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2314     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2315 
2316     pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2317     pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2318     pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2319     pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2320     pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2321     pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2322     pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2323     pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2324 }
2325 
helper_msa_dotp_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2326 void helper_msa_dotp_u_w(CPUMIPSState *env,
2327                          uint32_t wd, uint32_t ws, uint32_t wt)
2328 {
2329     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2330     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2331     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2332 
2333     pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2334     pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2335     pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2336     pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2337 }
2338 
helper_msa_dotp_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2339 void helper_msa_dotp_u_d(CPUMIPSState *env,
2340                          uint32_t wd, uint32_t ws, uint32_t wt)
2341 {
2342     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2343     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2344     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2345 
2346     pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2347     pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2348 }
2349 
2350 
msa_dpadd_s_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)2351 static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
2352                                      int64_t arg2)
2353 {
2354     int64_t even_arg1;
2355     int64_t even_arg2;
2356     int64_t odd_arg1;
2357     int64_t odd_arg2;
2358     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2359     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2360     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2361 }
2362 
helper_msa_dpadd_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2363 void helper_msa_dpadd_s_h(CPUMIPSState *env,
2364                           uint32_t wd, uint32_t ws, uint32_t wt)
2365 {
2366     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2367     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2368     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2369 
2370     pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2371     pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2372     pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2373     pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2374     pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2375     pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2376     pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2377     pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2378 }
2379 
helper_msa_dpadd_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2380 void helper_msa_dpadd_s_w(CPUMIPSState *env,
2381                           uint32_t wd, uint32_t ws, uint32_t wt)
2382 {
2383     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2384     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2385     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2386 
2387     pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2388     pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2389     pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2390     pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2391 }
2392 
helper_msa_dpadd_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2393 void helper_msa_dpadd_s_d(CPUMIPSState *env,
2394                           uint32_t wd, uint32_t ws, uint32_t wt)
2395 {
2396     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2397     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2398     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2399 
2400     pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2401     pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2402 }
2403 
2404 
msa_dpadd_u_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)2405 static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
2406                                      int64_t arg2)
2407 {
2408     int64_t even_arg1;
2409     int64_t even_arg2;
2410     int64_t odd_arg1;
2411     int64_t odd_arg2;
2412     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2413     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2414     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2415 }
2416 
helper_msa_dpadd_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2417 void helper_msa_dpadd_u_h(CPUMIPSState *env,
2418                           uint32_t wd, uint32_t ws, uint32_t wt)
2419 {
2420     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2421     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2422     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2423 
2424     pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2425     pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2426     pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2427     pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2428     pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2429     pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2430     pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2431     pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2432 }
2433 
helper_msa_dpadd_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2434 void helper_msa_dpadd_u_w(CPUMIPSState *env,
2435                           uint32_t wd, uint32_t ws, uint32_t wt)
2436 {
2437     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2438     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2439     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2440 
2441     pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2442     pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2443     pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2444     pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2445 }
2446 
helper_msa_dpadd_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2447 void helper_msa_dpadd_u_d(CPUMIPSState *env,
2448                           uint32_t wd, uint32_t ws, uint32_t wt)
2449 {
2450     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2451     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2452     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2453 
2454     pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2455     pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2456 }
2457 
2458 
msa_dpsub_s_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)2459 static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
2460                                      int64_t arg2)
2461 {
2462     int64_t even_arg1;
2463     int64_t even_arg2;
2464     int64_t odd_arg1;
2465     int64_t odd_arg2;
2466     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2467     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2468     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2469 }
2470 
helper_msa_dpsub_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2471 void helper_msa_dpsub_s_h(CPUMIPSState *env,
2472                           uint32_t wd, uint32_t ws, uint32_t wt)
2473 {
2474     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2475     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2476     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2477 
2478     pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2479     pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2480     pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2481     pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2482     pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2483     pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2484     pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2485     pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2486 }
2487 
helper_msa_dpsub_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2488 void helper_msa_dpsub_s_w(CPUMIPSState *env,
2489                           uint32_t wd, uint32_t ws, uint32_t wt)
2490 {
2491     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2492     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2493     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2494 
2495     pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2496     pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2497     pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2498     pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2499 }
2500 
helper_msa_dpsub_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2501 void helper_msa_dpsub_s_d(CPUMIPSState *env,
2502                           uint32_t wd, uint32_t ws, uint32_t wt)
2503 {
2504     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2505     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2506     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2507 
2508     pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2509     pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2510 }
2511 
2512 
msa_dpsub_u_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)2513 static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
2514                                      int64_t arg2)
2515 {
2516     int64_t even_arg1;
2517     int64_t even_arg2;
2518     int64_t odd_arg1;
2519     int64_t odd_arg2;
2520     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2521     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2522     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2523 }
2524 
helper_msa_dpsub_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2525 void helper_msa_dpsub_u_h(CPUMIPSState *env,
2526                           uint32_t wd, uint32_t ws, uint32_t wt)
2527 {
2528     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2529     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2530     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2531 
2532     pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2533     pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2534     pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2535     pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2536     pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2537     pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2538     pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2539     pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2540 }
2541 
helper_msa_dpsub_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2542 void helper_msa_dpsub_u_w(CPUMIPSState *env,
2543                           uint32_t wd, uint32_t ws, uint32_t wt)
2544 {
2545     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2546     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2547     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2548 
2549     pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2550     pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2551     pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2552     pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2553 }
2554 
helper_msa_dpsub_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2555 void helper_msa_dpsub_u_d(CPUMIPSState *env,
2556                           uint32_t wd, uint32_t ws, uint32_t wt)
2557 {
2558     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2559     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2560     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2561 
2562     pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2563     pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2564 }
2565 
2566 
2567 /*
2568  * Int Max Min
2569  * -----------
2570  *
2571  * +---------------+----------------------------------------------------------+
2572  * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
2573  * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
2574  * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
2575  * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
2576  * | MAX_S.B       | Vector Signed Maximum (byte)                             |
2577  * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
2578  * | MAX_S.W       | Vector Signed Maximum (word)                             |
2579  * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
2580  * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
2581  * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
2582  * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
2583  * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
2584  * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
2585  * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
2586  * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
2587  * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
2588  * | MIN_S.B       | Vector Signed Minimum (byte)                             |
2589  * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
2590  * | MIN_S.W       | Vector Signed Minimum (word)                             |
2591  * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
2592  * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
2593  * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
2594  * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
2595  * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
2596  * +---------------+----------------------------------------------------------+
2597  */
2598 
msa_max_a_df(uint32_t df,int64_t arg1,int64_t arg2)2599 static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2600 {
2601     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2602     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2603     return abs_arg1 > abs_arg2 ? arg1 : arg2;
2604 }
2605 
helper_msa_max_a_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2606 void helper_msa_max_a_b(CPUMIPSState *env,
2607                         uint32_t wd, uint32_t ws, uint32_t wt)
2608 {
2609     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2610     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2611     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2612 
2613     pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2614     pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2615     pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2616     pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2617     pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2618     pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2619     pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2620     pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2621     pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2622     pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2623     pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2624     pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2625     pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2626     pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2627     pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2628     pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2629 }
2630 
helper_msa_max_a_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2631 void helper_msa_max_a_h(CPUMIPSState *env,
2632                         uint32_t wd, uint32_t ws, uint32_t wt)
2633 {
2634     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2635     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2636     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2637 
2638     pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2639     pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2640     pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2641     pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2642     pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2643     pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2644     pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2645     pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2646 }
2647 
helper_msa_max_a_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2648 void helper_msa_max_a_w(CPUMIPSState *env,
2649                         uint32_t wd, uint32_t ws, uint32_t wt)
2650 {
2651     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2652     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2653     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2654 
2655     pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2656     pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2657     pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2658     pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2659 }
2660 
helper_msa_max_a_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2661 void helper_msa_max_a_d(CPUMIPSState *env,
2662                         uint32_t wd, uint32_t ws, uint32_t wt)
2663 {
2664     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2665     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2666     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2667 
2668     pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2669     pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2670 }
2671 
2672 
msa_max_s_df(uint32_t df,int64_t arg1,int64_t arg2)2673 static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2674 {
2675     return arg1 > arg2 ? arg1 : arg2;
2676 }
2677 
helper_msa_max_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2678 void helper_msa_max_s_b(CPUMIPSState *env,
2679                         uint32_t wd, uint32_t ws, uint32_t wt)
2680 {
2681     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2682     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2683     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2684 
2685     pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2686     pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2687     pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2688     pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2689     pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2690     pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2691     pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2692     pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2693     pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2694     pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2695     pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2696     pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2697     pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2698     pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2699     pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2700     pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2701 }
2702 
helper_msa_max_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2703 void helper_msa_max_s_h(CPUMIPSState *env,
2704                         uint32_t wd, uint32_t ws, uint32_t wt)
2705 {
2706     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2707     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2708     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2709 
2710     pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2711     pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2712     pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2713     pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2714     pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2715     pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2716     pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2717     pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2718 }
2719 
helper_msa_max_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2720 void helper_msa_max_s_w(CPUMIPSState *env,
2721                         uint32_t wd, uint32_t ws, uint32_t wt)
2722 {
2723     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2724     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2725     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2726 
2727     pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2728     pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2729     pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2730     pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2731 }
2732 
helper_msa_max_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2733 void helper_msa_max_s_d(CPUMIPSState *env,
2734                         uint32_t wd, uint32_t ws, uint32_t wt)
2735 {
2736     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2737     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2738     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2739 
2740     pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2741     pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2742 }
2743 
2744 
msa_max_u_df(uint32_t df,int64_t arg1,int64_t arg2)2745 static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2746 {
2747     uint64_t u_arg1 = UNSIGNED(arg1, df);
2748     uint64_t u_arg2 = UNSIGNED(arg2, df);
2749     return u_arg1 > u_arg2 ? arg1 : arg2;
2750 }
2751 
helper_msa_max_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2752 void helper_msa_max_u_b(CPUMIPSState *env,
2753                         uint32_t wd, uint32_t ws, uint32_t wt)
2754 {
2755     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2756     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2757     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2758 
2759     pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2760     pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2761     pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2762     pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2763     pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2764     pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2765     pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2766     pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2767     pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2768     pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2769     pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2770     pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2771     pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2772     pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2773     pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2774     pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2775 }
2776 
helper_msa_max_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2777 void helper_msa_max_u_h(CPUMIPSState *env,
2778                         uint32_t wd, uint32_t ws, uint32_t wt)
2779 {
2780     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2781     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2782     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2783 
2784     pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2785     pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2786     pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2787     pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2788     pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2789     pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2790     pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2791     pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2792 }
2793 
helper_msa_max_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2794 void helper_msa_max_u_w(CPUMIPSState *env,
2795                         uint32_t wd, uint32_t ws, uint32_t wt)
2796 {
2797     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2798     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2799     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2800 
2801     pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2802     pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2803     pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2804     pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2805 }
2806 
helper_msa_max_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2807 void helper_msa_max_u_d(CPUMIPSState *env,
2808                         uint32_t wd, uint32_t ws, uint32_t wt)
2809 {
2810     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2811     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2812     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2813 
2814     pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2815     pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2816 }
2817 
2818 
msa_min_a_df(uint32_t df,int64_t arg1,int64_t arg2)2819 static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2820 {
2821     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2822     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2823     return abs_arg1 < abs_arg2 ? arg1 : arg2;
2824 }
2825 
helper_msa_min_a_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2826 void helper_msa_min_a_b(CPUMIPSState *env,
2827                         uint32_t wd, uint32_t ws, uint32_t wt)
2828 {
2829     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2830     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2831     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2832 
2833     pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2834     pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2835     pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2836     pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2837     pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2838     pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2839     pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2840     pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2841     pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2842     pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2843     pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2844     pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2845     pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2846     pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2847     pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2848     pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2849 }
2850 
helper_msa_min_a_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2851 void helper_msa_min_a_h(CPUMIPSState *env,
2852                         uint32_t wd, uint32_t ws, uint32_t wt)
2853 {
2854     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2855     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2856     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2857 
2858     pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2859     pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2860     pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2861     pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2862     pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2863     pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2864     pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2865     pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2866 }
2867 
helper_msa_min_a_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2868 void helper_msa_min_a_w(CPUMIPSState *env,
2869                         uint32_t wd, uint32_t ws, uint32_t wt)
2870 {
2871     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2872     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2873     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2874 
2875     pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2876     pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2877     pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2878     pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2879 }
2880 
helper_msa_min_a_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2881 void helper_msa_min_a_d(CPUMIPSState *env,
2882                         uint32_t wd, uint32_t ws, uint32_t wt)
2883 {
2884     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2885     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2886     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2887 
2888     pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2889     pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2890 }
2891 
2892 
msa_min_s_df(uint32_t df,int64_t arg1,int64_t arg2)2893 static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2894 {
2895     return arg1 < arg2 ? arg1 : arg2;
2896 }
2897 
helper_msa_min_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2898 void helper_msa_min_s_b(CPUMIPSState *env,
2899                         uint32_t wd, uint32_t ws, uint32_t wt)
2900 {
2901     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2902     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2903     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2904 
2905     pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2906     pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2907     pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2908     pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2909     pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2910     pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2911     pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2912     pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2913     pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2914     pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2915     pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2916     pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2917     pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2918     pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2919     pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2920     pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2921 }
2922 
helper_msa_min_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2923 void helper_msa_min_s_h(CPUMIPSState *env,
2924                         uint32_t wd, uint32_t ws, uint32_t wt)
2925 {
2926     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2927     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2928     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2929 
2930     pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2931     pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2932     pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2933     pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2934     pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2935     pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2936     pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2937     pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2938 }
2939 
helper_msa_min_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2940 void helper_msa_min_s_w(CPUMIPSState *env,
2941                         uint32_t wd, uint32_t ws, uint32_t wt)
2942 {
2943     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2944     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2945     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2946 
2947     pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2948     pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2949     pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2950     pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2951 }
2952 
helper_msa_min_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2953 void helper_msa_min_s_d(CPUMIPSState *env,
2954                         uint32_t wd, uint32_t ws, uint32_t wt)
2955 {
2956     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2957     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2958     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2959 
2960     pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2961     pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2962 }
2963 
2964 
msa_min_u_df(uint32_t df,int64_t arg1,int64_t arg2)2965 static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2966 {
2967     uint64_t u_arg1 = UNSIGNED(arg1, df);
2968     uint64_t u_arg2 = UNSIGNED(arg2, df);
2969     return u_arg1 < u_arg2 ? arg1 : arg2;
2970 }
2971 
helper_msa_min_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2972 void helper_msa_min_u_b(CPUMIPSState *env,
2973                         uint32_t wd, uint32_t ws, uint32_t wt)
2974 {
2975     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2976     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2977     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2978 
2979     pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2980     pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2981     pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2982     pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2983     pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2984     pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2985     pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2986     pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2987     pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2988     pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2989     pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2990     pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2991     pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2992     pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2993     pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2994     pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2995 }
2996 
helper_msa_min_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)2997 void helper_msa_min_u_h(CPUMIPSState *env,
2998                         uint32_t wd, uint32_t ws, uint32_t wt)
2999 {
3000     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3001     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3002     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3003 
3004     pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3005     pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3006     pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3007     pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3008     pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3009     pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3010     pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3011     pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3012 }
3013 
helper_msa_min_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3014 void helper_msa_min_u_w(CPUMIPSState *env,
3015                         uint32_t wd, uint32_t ws, uint32_t wt)
3016 {
3017     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3018     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3019     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3020 
3021     pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3022     pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3023     pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3024     pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3025 }
3026 
helper_msa_min_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3027 void helper_msa_min_u_d(CPUMIPSState *env,
3028                         uint32_t wd, uint32_t ws, uint32_t wt)
3029 {
3030     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3031     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3032     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3033 
3034     pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3035     pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3036 }
3037 
3038 
3039 /*
3040  * Int Modulo
3041  * ----------
3042  *
3043  * +---------------+----------------------------------------------------------+
3044  * | MOD_S.B       | Vector Signed Modulo (byte)                              |
3045  * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
3046  * | MOD_S.W       | Vector Signed Modulo (word)                              |
3047  * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
3048  * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
3049  * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
3050  * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
3051  * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
3052  * +---------------+----------------------------------------------------------+
3053  */
3054 
msa_mod_s_df(uint32_t df,int64_t arg1,int64_t arg2)3055 static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3056 {
3057     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
3058         return 0;
3059     }
3060     return arg2 ? arg1 % arg2 : arg1;
3061 }
3062 
helper_msa_mod_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3063 void helper_msa_mod_s_b(CPUMIPSState *env,
3064                         uint32_t wd, uint32_t ws, uint32_t wt)
3065 {
3066     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3067     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3068     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3069 
3070     pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3071     pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3072     pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3073     pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3074     pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3075     pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3076     pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3077     pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3078     pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3079     pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3080     pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3081     pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3082     pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3083     pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3084     pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3085     pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3086 }
3087 
helper_msa_mod_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3088 void helper_msa_mod_s_h(CPUMIPSState *env,
3089                         uint32_t wd, uint32_t ws, uint32_t wt)
3090 {
3091     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3092     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3093     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3094 
3095     pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3096     pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3097     pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3098     pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3099     pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3100     pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3101     pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3102     pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3103 }
3104 
helper_msa_mod_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3105 void helper_msa_mod_s_w(CPUMIPSState *env,
3106                         uint32_t wd, uint32_t ws, uint32_t wt)
3107 {
3108     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3109     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3110     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3111 
3112     pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3113     pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3114     pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3115     pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3116 }
3117 
helper_msa_mod_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3118 void helper_msa_mod_s_d(CPUMIPSState *env,
3119                         uint32_t wd, uint32_t ws, uint32_t wt)
3120 {
3121     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3122     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3123     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3124 
3125     pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3126     pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3127 }
3128 
msa_mod_u_df(uint32_t df,int64_t arg1,int64_t arg2)3129 static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3130 {
3131     uint64_t u_arg1 = UNSIGNED(arg1, df);
3132     uint64_t u_arg2 = UNSIGNED(arg2, df);
3133     return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
3134 }
3135 
helper_msa_mod_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3136 void helper_msa_mod_u_b(CPUMIPSState *env,
3137                         uint32_t wd, uint32_t ws, uint32_t wt)
3138 {
3139     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3140     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3141     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3142 
3143     pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3144     pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3145     pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3146     pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3147     pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3148     pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3149     pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3150     pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3151     pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3152     pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3153     pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3154     pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3155     pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3156     pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3157     pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3158     pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3159 }
3160 
helper_msa_mod_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3161 void helper_msa_mod_u_h(CPUMIPSState *env,
3162                         uint32_t wd, uint32_t ws, uint32_t wt)
3163 {
3164     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3165     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3166     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3167 
3168     pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3169     pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3170     pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3171     pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3172     pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3173     pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3174     pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3175     pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3176 }
3177 
helper_msa_mod_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3178 void helper_msa_mod_u_w(CPUMIPSState *env,
3179                         uint32_t wd, uint32_t ws, uint32_t wt)
3180 {
3181     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3182     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3183     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3184 
3185     pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3186     pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3187     pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3188     pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3189 }
3190 
helper_msa_mod_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3191 void helper_msa_mod_u_d(CPUMIPSState *env,
3192                         uint32_t wd, uint32_t ws, uint32_t wt)
3193 {
3194     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3195     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3196     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3197 
3198     pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3199     pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3200 }
3201 
3202 
3203 /*
3204  * Int Multiply
3205  * ------------
3206  *
3207  * +---------------+----------------------------------------------------------+
3208  * | MADDV.B       | Vector Multiply and Add (byte)                           |
3209  * | MADDV.H       | Vector Multiply and Add (halfword)                       |
3210  * | MADDV.W       | Vector Multiply and Add (word)                           |
3211  * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
3212  * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
3213  * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
3214  * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
3215  * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
3216  * | MULV.B        | Vector Multiply (byte)                                   |
3217  * | MULV.H        | Vector Multiply (halfword)                               |
3218  * | MULV.W        | Vector Multiply (word)                                   |
3219  * | MULV.D        | Vector Multiply (doubleword)                             |
3220  * +---------------+----------------------------------------------------------+
3221  */
3222 
msa_maddv_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)3223 static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
3224                                    int64_t arg2)
3225 {
3226     return dest + arg1 * arg2;
3227 }
3228 
helper_msa_maddv_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3229 void helper_msa_maddv_b(CPUMIPSState *env,
3230                         uint32_t wd, uint32_t ws, uint32_t wt)
3231 {
3232     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3233     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3234     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3235 
3236     pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3237     pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3238     pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3239     pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3240     pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3241     pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3242     pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3243     pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3244     pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3245     pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3246     pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3247     pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3248     pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3249     pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3250     pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3251     pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3252 }
3253 
helper_msa_maddv_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3254 void helper_msa_maddv_h(CPUMIPSState *env,
3255                         uint32_t wd, uint32_t ws, uint32_t wt)
3256 {
3257     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3258     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3259     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3260 
3261     pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3262     pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3263     pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3264     pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3265     pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3266     pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3267     pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3268     pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3269 }
3270 
helper_msa_maddv_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3271 void helper_msa_maddv_w(CPUMIPSState *env,
3272                         uint32_t wd, uint32_t ws, uint32_t wt)
3273 {
3274     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3275     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3276     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3277 
3278     pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3279     pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3280     pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3281     pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3282 }
3283 
helper_msa_maddv_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3284 void helper_msa_maddv_d(CPUMIPSState *env,
3285                         uint32_t wd, uint32_t ws, uint32_t wt)
3286 {
3287     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3288     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3289     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3290 
3291     pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3292     pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3293 }
3294 
msa_msubv_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)3295 static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
3296                                    int64_t arg2)
3297 {
3298     return dest - arg1 * arg2;
3299 }
3300 
helper_msa_msubv_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3301 void helper_msa_msubv_b(CPUMIPSState *env,
3302                         uint32_t wd, uint32_t ws, uint32_t wt)
3303 {
3304     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3305     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3306     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3307 
3308     pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3309     pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3310     pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3311     pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3312     pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3313     pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3314     pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3315     pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3316     pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3317     pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3318     pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3319     pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3320     pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3321     pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3322     pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3323     pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3324 }
3325 
helper_msa_msubv_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3326 void helper_msa_msubv_h(CPUMIPSState *env,
3327                         uint32_t wd, uint32_t ws, uint32_t wt)
3328 {
3329     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3330     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3331     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3332 
3333     pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3334     pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3335     pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3336     pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3337     pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3338     pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3339     pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3340     pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3341 }
3342 
helper_msa_msubv_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3343 void helper_msa_msubv_w(CPUMIPSState *env,
3344                         uint32_t wd, uint32_t ws, uint32_t wt)
3345 {
3346     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3347     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3348     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3349 
3350     pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3351     pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3352     pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3353     pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3354 }
3355 
helper_msa_msubv_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3356 void helper_msa_msubv_d(CPUMIPSState *env,
3357                         uint32_t wd, uint32_t ws, uint32_t wt)
3358 {
3359     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3360     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3361     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3362 
3363     pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3364     pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3365 }
3366 
3367 
msa_mulv_df(uint32_t df,int64_t arg1,int64_t arg2)3368 static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
3369 {
3370     return arg1 * arg2;
3371 }
3372 
helper_msa_mulv_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3373 void helper_msa_mulv_b(CPUMIPSState *env,
3374                        uint32_t wd, uint32_t ws, uint32_t wt)
3375 {
3376     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3377     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3378     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3379 
3380     pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3381     pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3382     pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3383     pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3384     pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3385     pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3386     pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3387     pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3388     pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3389     pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3390     pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
3391     pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
3392     pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
3393     pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
3394     pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
3395     pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
3396 }
3397 
helper_msa_mulv_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3398 void helper_msa_mulv_h(CPUMIPSState *env,
3399                        uint32_t wd, uint32_t ws, uint32_t wt)
3400 {
3401     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3402     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3403     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3404 
3405     pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
3406     pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
3407     pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
3408     pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
3409     pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
3410     pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
3411     pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
3412     pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
3413 }
3414 
helper_msa_mulv_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3415 void helper_msa_mulv_w(CPUMIPSState *env,
3416                        uint32_t wd, uint32_t ws, uint32_t wt)
3417 {
3418     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3419     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3420     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3421 
3422     pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
3423     pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
3424     pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
3425     pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
3426 }
3427 
helper_msa_mulv_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3428 void helper_msa_mulv_d(CPUMIPSState *env,
3429                        uint32_t wd, uint32_t ws, uint32_t wt)
3430 {
3431     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3432     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3433     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3434 
3435     pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3436     pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3437 }
3438 
3439 
3440 /*
3441  * Int Subtract
3442  * ------------
3443  *
3444  * +---------------+----------------------------------------------------------+
3445  * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
3446  * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
3447  * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
3448  * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
3449  * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
3450  * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
3451  * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
3452  * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
3453  * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
3454  * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
3455  * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
3456  * | HSUB_U.H      | Vector Unsigned Horizontal Subtract (halfword)           |
3457  * | HSUB_U.W      | Vector Unsigned Horizontal Subtract (word)               |
3458  * | HSUB_U.D      | Vector Unsigned Horizontal Subtract (doubleword)         |
3459  * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
3460  * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
3461  * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
3462  * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
3463  * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
3464  * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
3465  * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
3466  * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
3467  * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
3468  * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
3469  * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
3470  * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
3471  * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
3472  * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
3473  * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
3474  * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
3475  * | SUBV.B        | Vector Subtract (byte)                                   |
3476  * | SUBV.H        | Vector Subtract (halfword)                               |
3477  * | SUBV.W        | Vector Subtract (word)                                   |
3478  * | SUBV.D        | Vector Subtract (doubleword)                             |
3479  * +---------------+----------------------------------------------------------+
3480  */
3481 
3482 
msa_asub_s_df(uint32_t df,int64_t arg1,int64_t arg2)3483 static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3484 {
3485     /* signed compare */
3486     return (arg1 < arg2) ?
3487         (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
3488 }
3489 
helper_msa_asub_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3490 void helper_msa_asub_s_b(CPUMIPSState *env,
3491                          uint32_t wd, uint32_t ws, uint32_t wt)
3492 {
3493     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3494     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3495     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3496 
3497     pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3498     pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3499     pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3500     pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3501     pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3502     pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3503     pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3504     pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3505     pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3506     pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3507     pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3508     pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3509     pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3510     pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3511     pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3512     pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3513 }
3514 
helper_msa_asub_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3515 void helper_msa_asub_s_h(CPUMIPSState *env,
3516                          uint32_t wd, uint32_t ws, uint32_t wt)
3517 {
3518     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3519     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3520     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3521 
3522     pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3523     pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3524     pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3525     pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3526     pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3527     pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3528     pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3529     pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3530 }
3531 
helper_msa_asub_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3532 void helper_msa_asub_s_w(CPUMIPSState *env,
3533                          uint32_t wd, uint32_t ws, uint32_t wt)
3534 {
3535     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3536     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3537     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3538 
3539     pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3540     pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3541     pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3542     pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3543 }
3544 
helper_msa_asub_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3545 void helper_msa_asub_s_d(CPUMIPSState *env,
3546                          uint32_t wd, uint32_t ws, uint32_t wt)
3547 {
3548     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3549     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3550     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3551 
3552     pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3553     pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3554 }
3555 
3556 
msa_asub_u_df(uint32_t df,uint64_t arg1,uint64_t arg2)3557 static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
3558 {
3559     uint64_t u_arg1 = UNSIGNED(arg1, df);
3560     uint64_t u_arg2 = UNSIGNED(arg2, df);
3561     /* unsigned compare */
3562     return (u_arg1 < u_arg2) ?
3563         (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
3564 }
3565 
helper_msa_asub_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3566 void helper_msa_asub_u_b(CPUMIPSState *env,
3567                          uint32_t wd, uint32_t ws, uint32_t wt)
3568 {
3569     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3570     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3571     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3572 
3573     pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3574     pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3575     pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3576     pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3577     pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3578     pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3579     pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3580     pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3581     pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3582     pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3583     pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3584     pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3585     pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3586     pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3587     pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3588     pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3589 }
3590 
helper_msa_asub_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3591 void helper_msa_asub_u_h(CPUMIPSState *env,
3592                          uint32_t wd, uint32_t ws, uint32_t wt)
3593 {
3594     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3595     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3596     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3597 
3598     pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3599     pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3600     pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3601     pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3602     pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3603     pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3604     pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3605     pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3606 }
3607 
helper_msa_asub_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3608 void helper_msa_asub_u_w(CPUMIPSState *env,
3609                          uint32_t wd, uint32_t ws, uint32_t wt)
3610 {
3611     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3612     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3613     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3614 
3615     pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3616     pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3617     pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3618     pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3619 }
3620 
helper_msa_asub_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3621 void helper_msa_asub_u_d(CPUMIPSState *env,
3622                          uint32_t wd, uint32_t ws, uint32_t wt)
3623 {
3624     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3625     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3626     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3627 
3628     pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3629     pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3630 }
3631 
3632 
msa_hsub_s_df(uint32_t df,int64_t arg1,int64_t arg2)3633 static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3634 {
3635     return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
3636 }
3637 
helper_msa_hsub_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3638 void helper_msa_hsub_s_h(CPUMIPSState *env,
3639                          uint32_t wd, uint32_t ws, uint32_t wt)
3640 {
3641     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3642     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3643     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3644 
3645     pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3646     pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3647     pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3648     pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3649     pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3650     pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3651     pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3652     pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3653 }
3654 
helper_msa_hsub_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3655 void helper_msa_hsub_s_w(CPUMIPSState *env,
3656                          uint32_t wd, uint32_t ws, uint32_t wt)
3657 {
3658     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3659     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3660     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3661 
3662     pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3663     pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3664     pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3665     pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3666 }
3667 
helper_msa_hsub_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3668 void helper_msa_hsub_s_d(CPUMIPSState *env,
3669                          uint32_t wd, uint32_t ws, uint32_t wt)
3670 {
3671     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3672     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3673     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3674 
3675     pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3676     pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3677 }
3678 
3679 
msa_hsub_u_df(uint32_t df,int64_t arg1,int64_t arg2)3680 static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3681 {
3682     return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
3683 }
3684 
helper_msa_hsub_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3685 void helper_msa_hsub_u_h(CPUMIPSState *env,
3686                          uint32_t wd, uint32_t ws, uint32_t wt)
3687 {
3688     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3689     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3690     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3691 
3692     pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3693     pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3694     pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3695     pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3696     pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3697     pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3698     pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3699     pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3700 }
3701 
helper_msa_hsub_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3702 void helper_msa_hsub_u_w(CPUMIPSState *env,
3703                          uint32_t wd, uint32_t ws, uint32_t wt)
3704 {
3705     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3706     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3707     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3708 
3709     pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3710     pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3711     pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3712     pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3713 }
3714 
helper_msa_hsub_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3715 void helper_msa_hsub_u_d(CPUMIPSState *env,
3716                          uint32_t wd, uint32_t ws, uint32_t wt)
3717 {
3718     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3719     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3720     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3721 
3722     pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3723     pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3724 }
3725 
3726 
msa_subs_s_df(uint32_t df,int64_t arg1,int64_t arg2)3727 static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3728 {
3729     int64_t max_int = DF_MAX_INT(df);
3730     int64_t min_int = DF_MIN_INT(df);
3731     if (arg2 > 0) {
3732         return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
3733     } else {
3734         return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
3735     }
3736 }
3737 
helper_msa_subs_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3738 void helper_msa_subs_s_b(CPUMIPSState *env,
3739                          uint32_t wd, uint32_t ws, uint32_t wt)
3740 {
3741     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3742     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3743     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3744 
3745     pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3746     pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3747     pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3748     pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3749     pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3750     pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3751     pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3752     pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3753     pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3754     pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3755     pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3756     pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3757     pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3758     pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3759     pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3760     pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3761 }
3762 
helper_msa_subs_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3763 void helper_msa_subs_s_h(CPUMIPSState *env,
3764                          uint32_t wd, uint32_t ws, uint32_t wt)
3765 {
3766     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3767     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3768     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3769 
3770     pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3771     pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3772     pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3773     pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3774     pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3775     pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3776     pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3777     pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3778 }
3779 
helper_msa_subs_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3780 void helper_msa_subs_s_w(CPUMIPSState *env,
3781                          uint32_t wd, uint32_t ws, uint32_t wt)
3782 {
3783     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3784     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3785     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3786 
3787     pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3788     pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3789     pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3790     pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3791 }
3792 
helper_msa_subs_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3793 void helper_msa_subs_s_d(CPUMIPSState *env,
3794                          uint32_t wd, uint32_t ws, uint32_t wt)
3795 {
3796     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3797     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3798     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3799 
3800     pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3801     pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3802 }
3803 
3804 
msa_subs_u_df(uint32_t df,int64_t arg1,int64_t arg2)3805 static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3806 {
3807     uint64_t u_arg1 = UNSIGNED(arg1, df);
3808     uint64_t u_arg2 = UNSIGNED(arg2, df);
3809     return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
3810 }
3811 
helper_msa_subs_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3812 void helper_msa_subs_u_b(CPUMIPSState *env,
3813                          uint32_t wd, uint32_t ws, uint32_t wt)
3814 {
3815     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3816     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3817     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3818 
3819     pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3820     pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3821     pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3822     pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3823     pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3824     pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3825     pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3826     pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3827     pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3828     pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3829     pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3830     pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3831     pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3832     pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3833     pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3834     pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3835 }
3836 
helper_msa_subs_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3837 void helper_msa_subs_u_h(CPUMIPSState *env,
3838                          uint32_t wd, uint32_t ws, uint32_t wt)
3839 {
3840     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3841     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3842     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3843 
3844     pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3845     pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3846     pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3847     pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3848     pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3849     pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3850     pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3851     pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3852 }
3853 
helper_msa_subs_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3854 void helper_msa_subs_u_w(CPUMIPSState *env,
3855                          uint32_t wd, uint32_t ws, uint32_t wt)
3856 {
3857     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3858     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3859     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3860 
3861     pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3862     pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3863     pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3864     pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3865 }
3866 
helper_msa_subs_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3867 void helper_msa_subs_u_d(CPUMIPSState *env,
3868                          uint32_t wd, uint32_t ws, uint32_t wt)
3869 {
3870     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3871     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3872     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3873 
3874     pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3875     pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3876 }
3877 
3878 
msa_subsus_u_df(uint32_t df,int64_t arg1,int64_t arg2)3879 static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3880 {
3881     uint64_t u_arg1 = UNSIGNED(arg1, df);
3882     uint64_t max_uint = DF_MAX_UINT(df);
3883     if (arg2 >= 0) {
3884         uint64_t u_arg2 = (uint64_t)arg2;
3885         return (u_arg1 > u_arg2) ?
3886             (int64_t)(u_arg1 - u_arg2) :
3887             0;
3888     } else {
3889         uint64_t u_arg2 = (uint64_t)(-arg2);
3890         return (u_arg1 < max_uint - u_arg2) ?
3891             (int64_t)(u_arg1 + u_arg2) :
3892             (int64_t)max_uint;
3893     }
3894 }
3895 
helper_msa_subsus_u_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3896 void helper_msa_subsus_u_b(CPUMIPSState *env,
3897                            uint32_t wd, uint32_t ws, uint32_t wt)
3898 {
3899     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3900     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3901     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3902 
3903     pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3904     pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3905     pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3906     pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3907     pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3908     pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3909     pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3910     pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3911     pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3912     pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3913     pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3914     pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3915     pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3916     pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3917     pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3918     pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3919 }
3920 
helper_msa_subsus_u_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3921 void helper_msa_subsus_u_h(CPUMIPSState *env,
3922                            uint32_t wd, uint32_t ws, uint32_t wt)
3923 {
3924     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3925     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3926     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3927 
3928     pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3929     pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3930     pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3931     pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3932     pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3933     pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3934     pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3935     pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3936 }
3937 
helper_msa_subsus_u_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3938 void helper_msa_subsus_u_w(CPUMIPSState *env,
3939                            uint32_t wd, uint32_t ws, uint32_t wt)
3940 {
3941     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3942     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3943     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3944 
3945     pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3946     pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3947     pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3948     pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3949 }
3950 
helper_msa_subsus_u_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3951 void helper_msa_subsus_u_d(CPUMIPSState *env,
3952                            uint32_t wd, uint32_t ws, uint32_t wt)
3953 {
3954     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3955     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3956     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3957 
3958     pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3959     pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3960 }
3961 
3962 
msa_subsuu_s_df(uint32_t df,int64_t arg1,int64_t arg2)3963 static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3964 {
3965     uint64_t u_arg1 = UNSIGNED(arg1, df);
3966     uint64_t u_arg2 = UNSIGNED(arg2, df);
3967     int64_t max_int = DF_MAX_INT(df);
3968     int64_t min_int = DF_MIN_INT(df);
3969     if (u_arg1 > u_arg2) {
3970         return u_arg1 - u_arg2 < (uint64_t)max_int ?
3971             (int64_t)(u_arg1 - u_arg2) :
3972             max_int;
3973     } else {
3974         return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
3975             (int64_t)(u_arg1 - u_arg2) :
3976             min_int;
3977     }
3978 }
3979 
helper_msa_subsuu_s_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)3980 void helper_msa_subsuu_s_b(CPUMIPSState *env,
3981                            uint32_t wd, uint32_t ws, uint32_t wt)
3982 {
3983     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3984     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3985     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3986 
3987     pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3988     pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3989     pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3990     pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3991     pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3992     pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3993     pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3994     pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3995     pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3996     pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3997     pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3998     pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3999     pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
4000     pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
4001     pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
4002     pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
4003 }
4004 
helper_msa_subsuu_s_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4005 void helper_msa_subsuu_s_h(CPUMIPSState *env,
4006                            uint32_t wd, uint32_t ws, uint32_t wt)
4007 {
4008     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4009     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4010     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4011 
4012     pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
4013     pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
4014     pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
4015     pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
4016     pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
4017     pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
4018     pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
4019     pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
4020 }
4021 
helper_msa_subsuu_s_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4022 void helper_msa_subsuu_s_w(CPUMIPSState *env,
4023                            uint32_t wd, uint32_t ws, uint32_t wt)
4024 {
4025     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4026     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4027     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4028 
4029     pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
4030     pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
4031     pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
4032     pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
4033 }
4034 
helper_msa_subsuu_s_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4035 void helper_msa_subsuu_s_d(CPUMIPSState *env,
4036                            uint32_t wd, uint32_t ws, uint32_t wt)
4037 {
4038     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4039     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4040     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4041 
4042     pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4043     pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4044 }
4045 
4046 
msa_subv_df(uint32_t df,int64_t arg1,int64_t arg2)4047 static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
4048 {
4049     return arg1 - arg2;
4050 }
4051 
helper_msa_subv_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4052 void helper_msa_subv_b(CPUMIPSState *env,
4053                        uint32_t wd, uint32_t ws, uint32_t wt)
4054 {
4055     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4056     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4057     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4058 
4059     pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4060     pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4061     pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4062     pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4063     pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4064     pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4065     pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4066     pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4067     pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4068     pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4069     pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
4070     pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
4071     pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
4072     pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
4073     pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
4074     pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
4075 }
4076 
helper_msa_subv_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4077 void helper_msa_subv_h(CPUMIPSState *env,
4078                        uint32_t wd, uint32_t ws, uint32_t wt)
4079 {
4080     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4081     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4082     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4083 
4084     pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
4085     pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
4086     pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
4087     pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
4088     pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
4089     pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
4090     pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
4091     pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
4092 }
4093 
helper_msa_subv_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4094 void helper_msa_subv_w(CPUMIPSState *env,
4095                        uint32_t wd, uint32_t ws, uint32_t wt)
4096 {
4097     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4098     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4099     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4100 
4101     pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
4102     pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
4103     pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
4104     pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
4105 }
4106 
helper_msa_subv_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4107 void helper_msa_subv_d(CPUMIPSState *env,
4108                        uint32_t wd, uint32_t ws, uint32_t wt)
4109 {
4110     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4111     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4112     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4113 
4114     pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4115     pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4116 }
4117 
4118 
4119 /*
4120  * Interleave
4121  * ----------
4122  *
4123  * +---------------+----------------------------------------------------------+
4124  * | ILVEV.B       | Vector Interleave Even (byte)                            |
4125  * | ILVEV.H       | Vector Interleave Even (halfword)                        |
4126  * | ILVEV.W       | Vector Interleave Even (word)                            |
4127  * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
4128  * | ILVOD.B       | Vector Interleave Odd (byte)                             |
4129  * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
4130  * | ILVOD.W       | Vector Interleave Odd (word)                             |
4131  * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
4132  * | ILVL.B        | Vector Interleave Left (byte)                            |
4133  * | ILVL.H        | Vector Interleave Left (halfword)                        |
4134  * | ILVL.W        | Vector Interleave Left (word)                            |
4135  * | ILVL.D        | Vector Interleave Left (doubleword)                      |
4136  * | ILVR.B        | Vector Interleave Right (byte)                           |
4137  * | ILVR.H        | Vector Interleave Right (halfword)                       |
4138  * | ILVR.W        | Vector Interleave Right (word)                           |
4139  * | ILVR.D        | Vector Interleave Right (doubleword)                     |
4140  * +---------------+----------------------------------------------------------+
4141  */
4142 
4143 
helper_msa_ilvev_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4144 void helper_msa_ilvev_b(CPUMIPSState *env,
4145                         uint32_t wd, uint32_t ws, uint32_t wt)
4146 {
4147     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4148     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4149     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4150 
4151 #if HOST_BIG_ENDIAN
4152     pwd->b[8]  = pws->b[9];
4153     pwd->b[9]  = pwt->b[9];
4154     pwd->b[10] = pws->b[11];
4155     pwd->b[11] = pwt->b[11];
4156     pwd->b[12] = pws->b[13];
4157     pwd->b[13] = pwt->b[13];
4158     pwd->b[14] = pws->b[15];
4159     pwd->b[15] = pwt->b[15];
4160     pwd->b[0]  = pws->b[1];
4161     pwd->b[1]  = pwt->b[1];
4162     pwd->b[2]  = pws->b[3];
4163     pwd->b[3]  = pwt->b[3];
4164     pwd->b[4]  = pws->b[5];
4165     pwd->b[5]  = pwt->b[5];
4166     pwd->b[6]  = pws->b[7];
4167     pwd->b[7]  = pwt->b[7];
4168 #else
4169     pwd->b[15] = pws->b[14];
4170     pwd->b[14] = pwt->b[14];
4171     pwd->b[13] = pws->b[12];
4172     pwd->b[12] = pwt->b[12];
4173     pwd->b[11] = pws->b[10];
4174     pwd->b[10] = pwt->b[10];
4175     pwd->b[9]  = pws->b[8];
4176     pwd->b[8]  = pwt->b[8];
4177     pwd->b[7]  = pws->b[6];
4178     pwd->b[6]  = pwt->b[6];
4179     pwd->b[5]  = pws->b[4];
4180     pwd->b[4]  = pwt->b[4];
4181     pwd->b[3]  = pws->b[2];
4182     pwd->b[2]  = pwt->b[2];
4183     pwd->b[1]  = pws->b[0];
4184     pwd->b[0]  = pwt->b[0];
4185 #endif
4186 }
4187 
helper_msa_ilvev_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4188 void helper_msa_ilvev_h(CPUMIPSState *env,
4189                         uint32_t wd, uint32_t ws, uint32_t wt)
4190 {
4191     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4192     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4193     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4194 
4195 #if HOST_BIG_ENDIAN
4196     pwd->h[4] = pws->h[5];
4197     pwd->h[5] = pwt->h[5];
4198     pwd->h[6] = pws->h[7];
4199     pwd->h[7] = pwt->h[7];
4200     pwd->h[0] = pws->h[1];
4201     pwd->h[1] = pwt->h[1];
4202     pwd->h[2] = pws->h[3];
4203     pwd->h[3] = pwt->h[3];
4204 #else
4205     pwd->h[7] = pws->h[6];
4206     pwd->h[6] = pwt->h[6];
4207     pwd->h[5] = pws->h[4];
4208     pwd->h[4] = pwt->h[4];
4209     pwd->h[3] = pws->h[2];
4210     pwd->h[2] = pwt->h[2];
4211     pwd->h[1] = pws->h[0];
4212     pwd->h[0] = pwt->h[0];
4213 #endif
4214 }
4215 
helper_msa_ilvev_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4216 void helper_msa_ilvev_w(CPUMIPSState *env,
4217                         uint32_t wd, uint32_t ws, uint32_t wt)
4218 {
4219     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4220     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4221     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4222 
4223 #if HOST_BIG_ENDIAN
4224     pwd->w[2] = pws->w[3];
4225     pwd->w[3] = pwt->w[3];
4226     pwd->w[0] = pws->w[1];
4227     pwd->w[1] = pwt->w[1];
4228 #else
4229     pwd->w[3] = pws->w[2];
4230     pwd->w[2] = pwt->w[2];
4231     pwd->w[1] = pws->w[0];
4232     pwd->w[0] = pwt->w[0];
4233 #endif
4234 }
4235 
helper_msa_ilvev_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4236 void helper_msa_ilvev_d(CPUMIPSState *env,
4237                         uint32_t wd, uint32_t ws, uint32_t wt)
4238 {
4239     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4240     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4241     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4242 
4243     pwd->d[1] = pws->d[0];
4244     pwd->d[0] = pwt->d[0];
4245 }
4246 
4247 
helper_msa_ilvod_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4248 void helper_msa_ilvod_b(CPUMIPSState *env,
4249                         uint32_t wd, uint32_t ws, uint32_t wt)
4250 {
4251     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4252     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4253     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4254 
4255 #if HOST_BIG_ENDIAN
4256     pwd->b[7]  = pwt->b[6];
4257     pwd->b[6]  = pws->b[6];
4258     pwd->b[5]  = pwt->b[4];
4259     pwd->b[4]  = pws->b[4];
4260     pwd->b[3]  = pwt->b[2];
4261     pwd->b[2]  = pws->b[2];
4262     pwd->b[1]  = pwt->b[0];
4263     pwd->b[0]  = pws->b[0];
4264     pwd->b[15] = pwt->b[14];
4265     pwd->b[14] = pws->b[14];
4266     pwd->b[13] = pwt->b[12];
4267     pwd->b[12] = pws->b[12];
4268     pwd->b[11] = pwt->b[10];
4269     pwd->b[10] = pws->b[10];
4270     pwd->b[9]  = pwt->b[8];
4271     pwd->b[8]  = pws->b[8];
4272 #else
4273     pwd->b[0]  = pwt->b[1];
4274     pwd->b[1]  = pws->b[1];
4275     pwd->b[2]  = pwt->b[3];
4276     pwd->b[3]  = pws->b[3];
4277     pwd->b[4]  = pwt->b[5];
4278     pwd->b[5]  = pws->b[5];
4279     pwd->b[6]  = pwt->b[7];
4280     pwd->b[7]  = pws->b[7];
4281     pwd->b[8]  = pwt->b[9];
4282     pwd->b[9]  = pws->b[9];
4283     pwd->b[10] = pwt->b[11];
4284     pwd->b[11] = pws->b[11];
4285     pwd->b[12] = pwt->b[13];
4286     pwd->b[13] = pws->b[13];
4287     pwd->b[14] = pwt->b[15];
4288     pwd->b[15] = pws->b[15];
4289 #endif
4290 }
4291 
helper_msa_ilvod_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4292 void helper_msa_ilvod_h(CPUMIPSState *env,
4293                         uint32_t wd, uint32_t ws, uint32_t wt)
4294 {
4295     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4296     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4297     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4298 
4299 #if HOST_BIG_ENDIAN
4300     pwd->h[3] = pwt->h[2];
4301     pwd->h[2] = pws->h[2];
4302     pwd->h[1] = pwt->h[0];
4303     pwd->h[0] = pws->h[0];
4304     pwd->h[7] = pwt->h[6];
4305     pwd->h[6] = pws->h[6];
4306     pwd->h[5] = pwt->h[4];
4307     pwd->h[4] = pws->h[4];
4308 #else
4309     pwd->h[0] = pwt->h[1];
4310     pwd->h[1] = pws->h[1];
4311     pwd->h[2] = pwt->h[3];
4312     pwd->h[3] = pws->h[3];
4313     pwd->h[4] = pwt->h[5];
4314     pwd->h[5] = pws->h[5];
4315     pwd->h[6] = pwt->h[7];
4316     pwd->h[7] = pws->h[7];
4317 #endif
4318 }
4319 
helper_msa_ilvod_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4320 void helper_msa_ilvod_w(CPUMIPSState *env,
4321                         uint32_t wd, uint32_t ws, uint32_t wt)
4322 {
4323     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4324     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4325     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4326 
4327 #if HOST_BIG_ENDIAN
4328     pwd->w[1] = pwt->w[0];
4329     pwd->w[0] = pws->w[0];
4330     pwd->w[3] = pwt->w[2];
4331     pwd->w[2] = pws->w[2];
4332 #else
4333     pwd->w[0] = pwt->w[1];
4334     pwd->w[1] = pws->w[1];
4335     pwd->w[2] = pwt->w[3];
4336     pwd->w[3] = pws->w[3];
4337 #endif
4338 }
4339 
helper_msa_ilvod_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4340 void helper_msa_ilvod_d(CPUMIPSState *env,
4341                         uint32_t wd, uint32_t ws, uint32_t wt)
4342 {
4343     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4344     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4345     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4346 
4347     pwd->d[0] = pwt->d[1];
4348     pwd->d[1] = pws->d[1];
4349 }
4350 
4351 
helper_msa_ilvl_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4352 void helper_msa_ilvl_b(CPUMIPSState *env,
4353                        uint32_t wd, uint32_t ws, uint32_t wt)
4354 {
4355     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4356     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4357     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4358 
4359 #if HOST_BIG_ENDIAN
4360     pwd->b[7]  = pwt->b[15];
4361     pwd->b[6]  = pws->b[15];
4362     pwd->b[5]  = pwt->b[14];
4363     pwd->b[4]  = pws->b[14];
4364     pwd->b[3]  = pwt->b[13];
4365     pwd->b[2]  = pws->b[13];
4366     pwd->b[1]  = pwt->b[12];
4367     pwd->b[0]  = pws->b[12];
4368     pwd->b[15] = pwt->b[11];
4369     pwd->b[14] = pws->b[11];
4370     pwd->b[13] = pwt->b[10];
4371     pwd->b[12] = pws->b[10];
4372     pwd->b[11] = pwt->b[9];
4373     pwd->b[10] = pws->b[9];
4374     pwd->b[9]  = pwt->b[8];
4375     pwd->b[8]  = pws->b[8];
4376 #else
4377     pwd->b[0]  = pwt->b[8];
4378     pwd->b[1]  = pws->b[8];
4379     pwd->b[2]  = pwt->b[9];
4380     pwd->b[3]  = pws->b[9];
4381     pwd->b[4]  = pwt->b[10];
4382     pwd->b[5]  = pws->b[10];
4383     pwd->b[6]  = pwt->b[11];
4384     pwd->b[7]  = pws->b[11];
4385     pwd->b[8]  = pwt->b[12];
4386     pwd->b[9]  = pws->b[12];
4387     pwd->b[10] = pwt->b[13];
4388     pwd->b[11] = pws->b[13];
4389     pwd->b[12] = pwt->b[14];
4390     pwd->b[13] = pws->b[14];
4391     pwd->b[14] = pwt->b[15];
4392     pwd->b[15] = pws->b[15];
4393 #endif
4394 }
4395 
helper_msa_ilvl_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4396 void helper_msa_ilvl_h(CPUMIPSState *env,
4397                        uint32_t wd, uint32_t ws, uint32_t wt)
4398 {
4399     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4400     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4401     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4402 
4403 #if HOST_BIG_ENDIAN
4404     pwd->h[3] = pwt->h[7];
4405     pwd->h[2] = pws->h[7];
4406     pwd->h[1] = pwt->h[6];
4407     pwd->h[0] = pws->h[6];
4408     pwd->h[7] = pwt->h[5];
4409     pwd->h[6] = pws->h[5];
4410     pwd->h[5] = pwt->h[4];
4411     pwd->h[4] = pws->h[4];
4412 #else
4413     pwd->h[0] = pwt->h[4];
4414     pwd->h[1] = pws->h[4];
4415     pwd->h[2] = pwt->h[5];
4416     pwd->h[3] = pws->h[5];
4417     pwd->h[4] = pwt->h[6];
4418     pwd->h[5] = pws->h[6];
4419     pwd->h[6] = pwt->h[7];
4420     pwd->h[7] = pws->h[7];
4421 #endif
4422 }
4423 
helper_msa_ilvl_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4424 void helper_msa_ilvl_w(CPUMIPSState *env,
4425                        uint32_t wd, uint32_t ws, uint32_t wt)
4426 {
4427     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4428     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4429     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4430 
4431 #if HOST_BIG_ENDIAN
4432     pwd->w[1] = pwt->w[3];
4433     pwd->w[0] = pws->w[3];
4434     pwd->w[3] = pwt->w[2];
4435     pwd->w[2] = pws->w[2];
4436 #else
4437     pwd->w[0] = pwt->w[2];
4438     pwd->w[1] = pws->w[2];
4439     pwd->w[2] = pwt->w[3];
4440     pwd->w[3] = pws->w[3];
4441 #endif
4442 }
4443 
helper_msa_ilvl_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4444 void helper_msa_ilvl_d(CPUMIPSState *env,
4445                        uint32_t wd, uint32_t ws, uint32_t wt)
4446 {
4447     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4448     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4449     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4450 
4451     pwd->d[0] = pwt->d[1];
4452     pwd->d[1] = pws->d[1];
4453 }
4454 
4455 
helper_msa_ilvr_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4456 void helper_msa_ilvr_b(CPUMIPSState *env,
4457                        uint32_t wd, uint32_t ws, uint32_t wt)
4458 {
4459     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4460     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4461     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4462 
4463 #if HOST_BIG_ENDIAN
4464     pwd->b[8]  = pws->b[0];
4465     pwd->b[9]  = pwt->b[0];
4466     pwd->b[10] = pws->b[1];
4467     pwd->b[11] = pwt->b[1];
4468     pwd->b[12] = pws->b[2];
4469     pwd->b[13] = pwt->b[2];
4470     pwd->b[14] = pws->b[3];
4471     pwd->b[15] = pwt->b[3];
4472     pwd->b[0]  = pws->b[4];
4473     pwd->b[1]  = pwt->b[4];
4474     pwd->b[2]  = pws->b[5];
4475     pwd->b[3]  = pwt->b[5];
4476     pwd->b[4]  = pws->b[6];
4477     pwd->b[5]  = pwt->b[6];
4478     pwd->b[6]  = pws->b[7];
4479     pwd->b[7]  = pwt->b[7];
4480 #else
4481     pwd->b[15] = pws->b[7];
4482     pwd->b[14] = pwt->b[7];
4483     pwd->b[13] = pws->b[6];
4484     pwd->b[12] = pwt->b[6];
4485     pwd->b[11] = pws->b[5];
4486     pwd->b[10] = pwt->b[5];
4487     pwd->b[9]  = pws->b[4];
4488     pwd->b[8]  = pwt->b[4];
4489     pwd->b[7]  = pws->b[3];
4490     pwd->b[6]  = pwt->b[3];
4491     pwd->b[5]  = pws->b[2];
4492     pwd->b[4]  = pwt->b[2];
4493     pwd->b[3]  = pws->b[1];
4494     pwd->b[2]  = pwt->b[1];
4495     pwd->b[1]  = pws->b[0];
4496     pwd->b[0]  = pwt->b[0];
4497 #endif
4498 }
4499 
helper_msa_ilvr_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4500 void helper_msa_ilvr_h(CPUMIPSState *env,
4501                        uint32_t wd, uint32_t ws, uint32_t wt)
4502 {
4503     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4504     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4505     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4506 
4507 #if HOST_BIG_ENDIAN
4508     pwd->h[4] = pws->h[0];
4509     pwd->h[5] = pwt->h[0];
4510     pwd->h[6] = pws->h[1];
4511     pwd->h[7] = pwt->h[1];
4512     pwd->h[0] = pws->h[2];
4513     pwd->h[1] = pwt->h[2];
4514     pwd->h[2] = pws->h[3];
4515     pwd->h[3] = pwt->h[3];
4516 #else
4517     pwd->h[7] = pws->h[3];
4518     pwd->h[6] = pwt->h[3];
4519     pwd->h[5] = pws->h[2];
4520     pwd->h[4] = pwt->h[2];
4521     pwd->h[3] = pws->h[1];
4522     pwd->h[2] = pwt->h[1];
4523     pwd->h[1] = pws->h[0];
4524     pwd->h[0] = pwt->h[0];
4525 #endif
4526 }
4527 
helper_msa_ilvr_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4528 void helper_msa_ilvr_w(CPUMIPSState *env,
4529                        uint32_t wd, uint32_t ws, uint32_t wt)
4530 {
4531     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4532     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4533     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4534 
4535 #if HOST_BIG_ENDIAN
4536     pwd->w[2] = pws->w[0];
4537     pwd->w[3] = pwt->w[0];
4538     pwd->w[0] = pws->w[1];
4539     pwd->w[1] = pwt->w[1];
4540 #else
4541     pwd->w[3] = pws->w[1];
4542     pwd->w[2] = pwt->w[1];
4543     pwd->w[1] = pws->w[0];
4544     pwd->w[0] = pwt->w[0];
4545 #endif
4546 }
4547 
helper_msa_ilvr_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4548 void helper_msa_ilvr_d(CPUMIPSState *env,
4549                        uint32_t wd, uint32_t ws, uint32_t wt)
4550 {
4551     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4552     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4553     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4554 
4555     pwd->d[1] = pws->d[0];
4556     pwd->d[0] = pwt->d[0];
4557 }
4558 
4559 
4560 /*
4561  * Logic
4562  * -----
4563  *
4564  * +---------------+----------------------------------------------------------+
4565  * | AND.V         | Vector Logical And                                       |
4566  * | NOR.V         | Vector Logical Negated Or                                |
4567  * | OR.V          | Vector Logical Or                                        |
4568  * | XOR.V         | Vector Logical Exclusive Or                              |
4569  * +---------------+----------------------------------------------------------+
4570  */
4571 
4572 
helper_msa_and_v(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4573 void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4574 {
4575     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4576     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4577     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4578 
4579     pwd->d[0] = pws->d[0] & pwt->d[0];
4580     pwd->d[1] = pws->d[1] & pwt->d[1];
4581 }
4582 
helper_msa_nor_v(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4583 void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4584 {
4585     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4586     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4587     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4588 
4589     pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
4590     pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
4591 }
4592 
helper_msa_or_v(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4593 void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4594 {
4595     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4596     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4597     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4598 
4599     pwd->d[0] = pws->d[0] | pwt->d[0];
4600     pwd->d[1] = pws->d[1] | pwt->d[1];
4601 }
4602 
helper_msa_xor_v(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4603 void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4604 {
4605     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4606     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4607     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4608 
4609     pwd->d[0] = pws->d[0] ^ pwt->d[0];
4610     pwd->d[1] = pws->d[1] ^ pwt->d[1];
4611 }
4612 
4613 
4614 /*
4615  * Move
4616  * ----
4617  *
4618  * +---------------+----------------------------------------------------------+
4619  * | MOVE.V        | Vector Move                                              |
4620  * +---------------+----------------------------------------------------------+
4621  */
4622 
msa_move_v(wr_t * pwd,wr_t * pws)4623 static inline void msa_move_v(wr_t *pwd, wr_t *pws)
4624 {
4625     pwd->d[0] = pws->d[0];
4626     pwd->d[1] = pws->d[1];
4627 }
4628 
helper_msa_move_v(CPUMIPSState * env,uint32_t wd,uint32_t ws)4629 void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
4630 {
4631     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4632     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4633 
4634     msa_move_v(pwd, pws);
4635 }
4636 
4637 
4638 /*
4639  * Pack
4640  * ----
4641  *
4642  * +---------------+----------------------------------------------------------+
4643  * | PCKEV.B       | Vector Pack Even (byte)                                  |
4644  * | PCKEV.H       | Vector Pack Even (halfword)                              |
4645  * | PCKEV.W       | Vector Pack Even (word)                                  |
4646  * | PCKEV.D       | Vector Pack Even (doubleword)                            |
4647  * | PCKOD.B       | Vector Pack Odd (byte)                                   |
4648  * | PCKOD.H       | Vector Pack Odd (halfword)                               |
4649  * | PCKOD.W       | Vector Pack Odd (word)                                   |
4650  * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
4651  * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
4652  * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
4653  * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
4654  * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
4655  * +---------------+----------------------------------------------------------+
4656  */
4657 
4658 
helper_msa_pckev_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4659 void helper_msa_pckev_b(CPUMIPSState *env,
4660                         uint32_t wd, uint32_t ws, uint32_t wt)
4661 {
4662     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4663     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4664     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4665 
4666 #if HOST_BIG_ENDIAN
4667     pwd->b[8]  = pws->b[9];
4668     pwd->b[10] = pws->b[13];
4669     pwd->b[12] = pws->b[1];
4670     pwd->b[14] = pws->b[5];
4671     pwd->b[0]  = pwt->b[9];
4672     pwd->b[2]  = pwt->b[13];
4673     pwd->b[4]  = pwt->b[1];
4674     pwd->b[6]  = pwt->b[5];
4675     pwd->b[9]  = pws->b[11];
4676     pwd->b[13] = pws->b[3];
4677     pwd->b[1]  = pwt->b[11];
4678     pwd->b[5]  = pwt->b[3];
4679     pwd->b[11] = pws->b[15];
4680     pwd->b[3]  = pwt->b[15];
4681     pwd->b[15] = pws->b[7];
4682     pwd->b[7]  = pwt->b[7];
4683 #else
4684     pwd->b[15] = pws->b[14];
4685     pwd->b[13] = pws->b[10];
4686     pwd->b[11] = pws->b[6];
4687     pwd->b[9]  = pws->b[2];
4688     pwd->b[7]  = pwt->b[14];
4689     pwd->b[5]  = pwt->b[10];
4690     pwd->b[3]  = pwt->b[6];
4691     pwd->b[1]  = pwt->b[2];
4692     pwd->b[14] = pws->b[12];
4693     pwd->b[10] = pws->b[4];
4694     pwd->b[6]  = pwt->b[12];
4695     pwd->b[2]  = pwt->b[4];
4696     pwd->b[12] = pws->b[8];
4697     pwd->b[4]  = pwt->b[8];
4698     pwd->b[8]  = pws->b[0];
4699     pwd->b[0]  = pwt->b[0];
4700 #endif
4701 }
4702 
helper_msa_pckev_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4703 void helper_msa_pckev_h(CPUMIPSState *env,
4704                         uint32_t wd, uint32_t ws, uint32_t wt)
4705 {
4706     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4707     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4708     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4709 
4710 #if HOST_BIG_ENDIAN
4711     pwd->h[4] = pws->h[5];
4712     pwd->h[6] = pws->h[1];
4713     pwd->h[0] = pwt->h[5];
4714     pwd->h[2] = pwt->h[1];
4715     pwd->h[5] = pws->h[7];
4716     pwd->h[1] = pwt->h[7];
4717     pwd->h[7] = pws->h[3];
4718     pwd->h[3] = pwt->h[3];
4719 #else
4720     pwd->h[7] = pws->h[6];
4721     pwd->h[5] = pws->h[2];
4722     pwd->h[3] = pwt->h[6];
4723     pwd->h[1] = pwt->h[2];
4724     pwd->h[6] = pws->h[4];
4725     pwd->h[2] = pwt->h[4];
4726     pwd->h[4] = pws->h[0];
4727     pwd->h[0] = pwt->h[0];
4728 #endif
4729 }
4730 
helper_msa_pckev_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4731 void helper_msa_pckev_w(CPUMIPSState *env,
4732                         uint32_t wd, uint32_t ws, uint32_t wt)
4733 {
4734     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4735     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4736     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4737 
4738 #if HOST_BIG_ENDIAN
4739     pwd->w[2] = pws->w[3];
4740     pwd->w[0] = pwt->w[3];
4741     pwd->w[3] = pws->w[1];
4742     pwd->w[1] = pwt->w[1];
4743 #else
4744     pwd->w[3] = pws->w[2];
4745     pwd->w[1] = pwt->w[2];
4746     pwd->w[2] = pws->w[0];
4747     pwd->w[0] = pwt->w[0];
4748 #endif
4749 }
4750 
helper_msa_pckev_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4751 void helper_msa_pckev_d(CPUMIPSState *env,
4752                         uint32_t wd, uint32_t ws, uint32_t wt)
4753 {
4754     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4755     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4756     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4757 
4758     pwd->d[1] = pws->d[0];
4759     pwd->d[0] = pwt->d[0];
4760 }
4761 
4762 
helper_msa_pckod_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4763 void helper_msa_pckod_b(CPUMIPSState *env,
4764                         uint32_t wd, uint32_t ws, uint32_t wt)
4765 {
4766     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4767     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4768     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4769 
4770 #if HOST_BIG_ENDIAN
4771     pwd->b[7]  = pwt->b[6];
4772     pwd->b[5]  = pwt->b[2];
4773     pwd->b[3]  = pwt->b[14];
4774     pwd->b[1]  = pwt->b[10];
4775     pwd->b[15] = pws->b[6];
4776     pwd->b[13] = pws->b[2];
4777     pwd->b[11] = pws->b[14];
4778     pwd->b[9]  = pws->b[10];
4779     pwd->b[6]  = pwt->b[4];
4780     pwd->b[2]  = pwt->b[12];
4781     pwd->b[14] = pws->b[4];
4782     pwd->b[10] = pws->b[12];
4783     pwd->b[4]  = pwt->b[0];
4784     pwd->b[12] = pws->b[0];
4785     pwd->b[0]  = pwt->b[8];
4786     pwd->b[8]  = pws->b[8];
4787 #else
4788     pwd->b[0]  = pwt->b[1];
4789     pwd->b[2]  = pwt->b[5];
4790     pwd->b[4]  = pwt->b[9];
4791     pwd->b[6]  = pwt->b[13];
4792     pwd->b[8]  = pws->b[1];
4793     pwd->b[10] = pws->b[5];
4794     pwd->b[12] = pws->b[9];
4795     pwd->b[14] = pws->b[13];
4796     pwd->b[1]  = pwt->b[3];
4797     pwd->b[5]  = pwt->b[11];
4798     pwd->b[9]  = pws->b[3];
4799     pwd->b[13] = pws->b[11];
4800     pwd->b[3]  = pwt->b[7];
4801     pwd->b[11] = pws->b[7];
4802     pwd->b[7]  = pwt->b[15];
4803     pwd->b[15] = pws->b[15];
4804 #endif
4805 
4806 }
4807 
helper_msa_pckod_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4808 void helper_msa_pckod_h(CPUMIPSState *env,
4809                         uint32_t wd, uint32_t ws, uint32_t wt)
4810 {
4811     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4812     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4813     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4814 
4815 #if HOST_BIG_ENDIAN
4816     pwd->h[3] = pwt->h[2];
4817     pwd->h[1] = pwt->h[6];
4818     pwd->h[7] = pws->h[2];
4819     pwd->h[5] = pws->h[6];
4820     pwd->h[2] = pwt->h[0];
4821     pwd->h[6] = pws->h[0];
4822     pwd->h[0] = pwt->h[4];
4823     pwd->h[4] = pws->h[4];
4824 #else
4825     pwd->h[0] = pwt->h[1];
4826     pwd->h[2] = pwt->h[5];
4827     pwd->h[4] = pws->h[1];
4828     pwd->h[6] = pws->h[5];
4829     pwd->h[1] = pwt->h[3];
4830     pwd->h[5] = pws->h[3];
4831     pwd->h[3] = pwt->h[7];
4832     pwd->h[7] = pws->h[7];
4833 #endif
4834 }
4835 
helper_msa_pckod_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4836 void helper_msa_pckod_w(CPUMIPSState *env,
4837                         uint32_t wd, uint32_t ws, uint32_t wt)
4838 {
4839     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4840     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4841     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4842 
4843 #if HOST_BIG_ENDIAN
4844     pwd->w[1] = pwt->w[0];
4845     pwd->w[3] = pws->w[0];
4846     pwd->w[0] = pwt->w[2];
4847     pwd->w[2] = pws->w[2];
4848 #else
4849     pwd->w[0] = pwt->w[1];
4850     pwd->w[2] = pws->w[1];
4851     pwd->w[1] = pwt->w[3];
4852     pwd->w[3] = pws->w[3];
4853 #endif
4854 }
4855 
helper_msa_pckod_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4856 void helper_msa_pckod_d(CPUMIPSState *env,
4857                         uint32_t wd, uint32_t ws, uint32_t wt)
4858 {
4859     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4860     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4861     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4862 
4863     pwd->d[0] = pwt->d[1];
4864     pwd->d[1] = pws->d[1];
4865 }
4866 
4867 
4868 /*
4869  * Shift
4870  * -----
4871  *
4872  * +---------------+----------------------------------------------------------+
4873  * | SLL.B         | Vector Shift Left (byte)                                 |
4874  * | SLL.H         | Vector Shift Left (halfword)                             |
4875  * | SLL.W         | Vector Shift Left (word)                                 |
4876  * | SLL.D         | Vector Shift Left (doubleword)                           |
4877  * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
4878  * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
4879  * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
4880  * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
4881  * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
4882  * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
4883  * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
4884  * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
4885  * | SRL.B         | Vector Shift Right Logical (byte)                        |
4886  * | SRL.H         | Vector Shift Right Logical (halfword)                    |
4887  * | SRL.W         | Vector Shift Right Logical (word)                        |
4888  * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
4889  * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
4890  * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
4891  * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
4892  * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
4893  * +---------------+----------------------------------------------------------+
4894  */
4895 
4896 
msa_sll_df(uint32_t df,int64_t arg1,int64_t arg2)4897 static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
4898 {
4899     int32_t b_arg2 = BIT_POSITION(arg2, df);
4900     return arg1 << b_arg2;
4901 }
4902 
helper_msa_sll_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4903 void helper_msa_sll_b(CPUMIPSState *env,
4904                       uint32_t wd, uint32_t ws, uint32_t wt)
4905 {
4906     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4907     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4908     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4909 
4910     pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4911     pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4912     pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4913     pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4914     pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4915     pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4916     pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4917     pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4918     pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4919     pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4920     pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
4921     pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
4922     pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
4923     pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
4924     pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
4925     pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
4926 }
4927 
helper_msa_sll_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4928 void helper_msa_sll_h(CPUMIPSState *env,
4929                       uint32_t wd, uint32_t ws, uint32_t wt)
4930 {
4931     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4932     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4933     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4934 
4935     pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
4936     pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
4937     pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
4938     pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
4939     pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
4940     pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
4941     pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
4942     pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
4943 }
4944 
helper_msa_sll_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4945 void helper_msa_sll_w(CPUMIPSState *env,
4946                       uint32_t wd, uint32_t ws, uint32_t wt)
4947 {
4948     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4949     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4950     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4951 
4952     pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
4953     pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
4954     pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
4955     pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
4956 }
4957 
helper_msa_sll_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4958 void helper_msa_sll_d(CPUMIPSState *env,
4959                       uint32_t wd, uint32_t ws, uint32_t wt)
4960 {
4961     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4962     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4963     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4964 
4965     pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4966     pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4967 }
4968 
4969 
msa_sra_df(uint32_t df,int64_t arg1,int64_t arg2)4970 static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
4971 {
4972     int32_t b_arg2 = BIT_POSITION(arg2, df);
4973     return arg1 >> b_arg2;
4974 }
4975 
helper_msa_sra_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)4976 void helper_msa_sra_b(CPUMIPSState *env,
4977                       uint32_t wd, uint32_t ws, uint32_t wt)
4978 {
4979     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4980     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4981     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4982 
4983     pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4984     pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4985     pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4986     pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4987     pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4988     pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4989     pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4990     pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4991     pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4992     pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4993     pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
4994     pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
4995     pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
4996     pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
4997     pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
4998     pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
4999 }
5000 
helper_msa_sra_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5001 void helper_msa_sra_h(CPUMIPSState *env,
5002                       uint32_t wd, uint32_t ws, uint32_t wt)
5003 {
5004     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5005     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5006     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5007 
5008     pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
5009     pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
5010     pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
5011     pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
5012     pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
5013     pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
5014     pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
5015     pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
5016 }
5017 
helper_msa_sra_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5018 void helper_msa_sra_w(CPUMIPSState *env,
5019                       uint32_t wd, uint32_t ws, uint32_t wt)
5020 {
5021     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5022     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5023     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5024 
5025     pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
5026     pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
5027     pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
5028     pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
5029 }
5030 
helper_msa_sra_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5031 void helper_msa_sra_d(CPUMIPSState *env,
5032                       uint32_t wd, uint32_t ws, uint32_t wt)
5033 {
5034     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5035     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5036     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5037 
5038     pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5039     pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5040 }
5041 
5042 
msa_srar_df(uint32_t df,int64_t arg1,int64_t arg2)5043 static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
5044 {
5045     int32_t b_arg2 = BIT_POSITION(arg2, df);
5046     if (b_arg2 == 0) {
5047         return arg1;
5048     } else {
5049         int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
5050         return (arg1 >> b_arg2) + r_bit;
5051     }
5052 }
5053 
helper_msa_srar_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5054 void helper_msa_srar_b(CPUMIPSState *env,
5055                        uint32_t wd, uint32_t ws, uint32_t wt)
5056 {
5057     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5058     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5059     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5060 
5061     pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5062     pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5063     pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5064     pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5065     pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5066     pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5067     pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5068     pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5069     pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5070     pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5071     pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
5072     pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
5073     pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
5074     pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
5075     pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
5076     pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
5077 }
5078 
helper_msa_srar_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5079 void helper_msa_srar_h(CPUMIPSState *env,
5080                        uint32_t wd, uint32_t ws, uint32_t wt)
5081 {
5082     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5083     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5084     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5085 
5086     pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
5087     pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
5088     pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
5089     pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
5090     pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
5091     pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
5092     pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
5093     pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
5094 }
5095 
helper_msa_srar_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5096 void helper_msa_srar_w(CPUMIPSState *env,
5097                        uint32_t wd, uint32_t ws, uint32_t wt)
5098 {
5099     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5100     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5101     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5102 
5103     pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
5104     pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
5105     pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
5106     pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
5107 }
5108 
helper_msa_srar_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5109 void helper_msa_srar_d(CPUMIPSState *env,
5110                        uint32_t wd, uint32_t ws, uint32_t wt)
5111 {
5112     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5113     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5114     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5115 
5116     pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5117     pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5118 }
5119 
5120 
msa_srl_df(uint32_t df,int64_t arg1,int64_t arg2)5121 static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
5122 {
5123     uint64_t u_arg1 = UNSIGNED(arg1, df);
5124     int32_t b_arg2 = BIT_POSITION(arg2, df);
5125     return u_arg1 >> b_arg2;
5126 }
5127 
helper_msa_srl_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5128 void helper_msa_srl_b(CPUMIPSState *env,
5129                       uint32_t wd, uint32_t ws, uint32_t wt)
5130 {
5131     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5132     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5133     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5134 
5135     pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5136     pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5137     pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5138     pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5139     pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5140     pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5141     pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5142     pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5143     pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5144     pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5145     pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
5146     pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
5147     pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
5148     pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
5149     pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
5150     pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
5151 }
5152 
helper_msa_srl_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5153 void helper_msa_srl_h(CPUMIPSState *env,
5154                       uint32_t wd, uint32_t ws, uint32_t wt)
5155 {
5156     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5157     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5158     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5159 
5160     pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
5161     pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
5162     pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
5163     pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
5164     pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
5165     pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
5166     pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
5167     pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
5168 }
5169 
helper_msa_srl_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5170 void helper_msa_srl_w(CPUMIPSState *env,
5171                       uint32_t wd, uint32_t ws, uint32_t wt)
5172 {
5173     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5174     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5175     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5176 
5177     pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
5178     pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
5179     pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
5180     pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
5181 }
5182 
helper_msa_srl_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5183 void helper_msa_srl_d(CPUMIPSState *env,
5184                       uint32_t wd, uint32_t ws, uint32_t wt)
5185 {
5186     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5187     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5188     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5189 
5190     pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5191     pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5192 }
5193 
5194 
msa_srlr_df(uint32_t df,int64_t arg1,int64_t arg2)5195 static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
5196 {
5197     uint64_t u_arg1 = UNSIGNED(arg1, df);
5198     int32_t b_arg2 = BIT_POSITION(arg2, df);
5199     if (b_arg2 == 0) {
5200         return u_arg1;
5201     } else {
5202         uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
5203         return (u_arg1 >> b_arg2) + r_bit;
5204     }
5205 }
5206 
helper_msa_srlr_b(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5207 void helper_msa_srlr_b(CPUMIPSState *env,
5208                        uint32_t wd, uint32_t ws, uint32_t wt)
5209 {
5210     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5211     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5212     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5213 
5214     pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5215     pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5216     pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5217     pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5218     pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5219     pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5220     pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5221     pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5222     pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5223     pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5224     pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
5225     pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
5226     pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
5227     pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
5228     pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
5229     pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
5230 }
5231 
helper_msa_srlr_h(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5232 void helper_msa_srlr_h(CPUMIPSState *env,
5233                        uint32_t wd, uint32_t ws, uint32_t wt)
5234 {
5235     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5236     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5237     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5238 
5239     pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
5240     pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
5241     pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
5242     pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
5243     pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
5244     pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
5245     pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
5246     pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
5247 }
5248 
helper_msa_srlr_w(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5249 void helper_msa_srlr_w(CPUMIPSState *env,
5250                        uint32_t wd, uint32_t ws, uint32_t wt)
5251 {
5252     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5253     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5254     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5255 
5256     pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
5257     pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
5258     pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
5259     pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
5260 }
5261 
helper_msa_srlr_d(CPUMIPSState * env,uint32_t wd,uint32_t ws,uint32_t wt)5262 void helper_msa_srlr_d(CPUMIPSState *env,
5263                        uint32_t wd, uint32_t ws, uint32_t wt)
5264 {
5265     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5266     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5267     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5268 
5269     pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5270     pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5271 }
5272 
5273 
5274 #define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
5275 void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
5276         uint32_t i8)                                                    \
5277 {                                                                       \
5278     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5279     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5280     uint32_t i;                                                         \
5281     for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
5282         DEST = OPERATION;                                               \
5283     }                                                                   \
5284 }
5285 
5286 MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
5287 MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
5288 MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
5289 MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
5290 
5291 #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
5292             UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
5293 MSA_FN_IMM8(bmnzi_b, pwd->b[i],
5294         BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5295 
5296 #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
5297             UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
5298 MSA_FN_IMM8(bmzi_b, pwd->b[i],
5299         BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5300 
5301 #define BIT_SELECT(dest, arg1, arg2, df) \
5302             UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
5303 MSA_FN_IMM8(bseli_b, pwd->b[i],
5304         BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
5305 
5306 #undef BIT_SELECT
5307 #undef BIT_MOVE_IF_ZERO
5308 #undef BIT_MOVE_IF_NOT_ZERO
5309 #undef MSA_FN_IMM8
5310 
5311 #define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
5312 
helper_msa_shf_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t imm)5313 void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5314                        uint32_t ws, uint32_t imm)
5315 {
5316     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5317     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5318     wr_t wx, *pwx = &wx;
5319     uint32_t i;
5320 
5321     switch (df) {
5322     case DF_BYTE:
5323         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5324             pwx->b[i] = pws->b[SHF_POS(i, imm)];
5325         }
5326         break;
5327     case DF_HALF:
5328         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5329             pwx->h[i] = pws->h[SHF_POS(i, imm)];
5330         }
5331         break;
5332     case DF_WORD:
5333         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5334             pwx->w[i] = pws->w[SHF_POS(i, imm)];
5335         }
5336         break;
5337     default:
5338         g_assert_not_reached();
5339     }
5340     msa_move_v(pwd, pwx);
5341 }
5342 
5343 #define MSA_BINOP_IMM_DF(helper, func)                                  \
5344 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5345                         uint32_t wd, uint32_t ws, int32_t u5)           \
5346 {                                                                       \
5347     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5348     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5349     uint32_t i;                                                         \
5350                                                                         \
5351     switch (df) {                                                       \
5352     case DF_BYTE:                                                       \
5353         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5354             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5355         }                                                               \
5356         break;                                                          \
5357     case DF_HALF:                                                       \
5358         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5359             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5360         }                                                               \
5361         break;                                                          \
5362     case DF_WORD:                                                       \
5363         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5364             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5365         }                                                               \
5366         break;                                                          \
5367     case DF_DOUBLE:                                                     \
5368         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5369             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5370         }                                                               \
5371         break;                                                          \
5372     default:                                                            \
5373         g_assert_not_reached();                                         \
5374     }                                                                   \
5375 }
5376 
MSA_BINOP_IMM_DF(addvi,addv)5377 MSA_BINOP_IMM_DF(addvi, addv)
5378 MSA_BINOP_IMM_DF(subvi, subv)
5379 MSA_BINOP_IMM_DF(ceqi, ceq)
5380 MSA_BINOP_IMM_DF(clei_s, cle_s)
5381 MSA_BINOP_IMM_DF(clei_u, cle_u)
5382 MSA_BINOP_IMM_DF(clti_s, clt_s)
5383 MSA_BINOP_IMM_DF(clti_u, clt_u)
5384 MSA_BINOP_IMM_DF(maxi_s, max_s)
5385 MSA_BINOP_IMM_DF(maxi_u, max_u)
5386 MSA_BINOP_IMM_DF(mini_s, min_s)
5387 MSA_BINOP_IMM_DF(mini_u, min_u)
5388 #undef MSA_BINOP_IMM_DF
5389 
5390 void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5391                        int32_t s10)
5392 {
5393     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5394     uint32_t i;
5395 
5396     switch (df) {
5397     case DF_BYTE:
5398         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5399             pwd->b[i] = (int8_t)s10;
5400         }
5401         break;
5402     case DF_HALF:
5403         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5404             pwd->h[i] = (int16_t)s10;
5405         }
5406         break;
5407     case DF_WORD:
5408         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5409             pwd->w[i] = (int32_t)s10;
5410         }
5411         break;
5412     case DF_DOUBLE:
5413         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5414             pwd->d[i] = (int64_t)s10;
5415         }
5416        break;
5417     default:
5418         g_assert_not_reached();
5419     }
5420 }
5421 
msa_sat_s_df(uint32_t df,int64_t arg,uint32_t m)5422 static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
5423 {
5424     return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
5425                                     arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
5426                                                              arg;
5427 }
5428 
msa_sat_u_df(uint32_t df,int64_t arg,uint32_t m)5429 static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
5430 {
5431     uint64_t u_arg = UNSIGNED(arg, df);
5432     return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
5433                                         M_MAX_UINT(m + 1);
5434 }
5435 
5436 #define MSA_BINOP_IMMU_DF(helper, func)                                  \
5437 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5438                        uint32_t ws, uint32_t u5)                        \
5439 {                                                                       \
5440     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5441     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5442     uint32_t i;                                                         \
5443                                                                         \
5444     switch (df) {                                                       \
5445     case DF_BYTE:                                                       \
5446         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5447             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5448         }                                                               \
5449         break;                                                          \
5450     case DF_HALF:                                                       \
5451         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5452             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5453         }                                                               \
5454         break;                                                          \
5455     case DF_WORD:                                                       \
5456         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5457             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5458         }                                                               \
5459         break;                                                          \
5460     case DF_DOUBLE:                                                     \
5461         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5462             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5463         }                                                               \
5464         break;                                                          \
5465     default:                                                            \
5466         g_assert_not_reached();                                         \
5467     }                                                                   \
5468 }
5469 
MSA_BINOP_IMMU_DF(slli,sll)5470 MSA_BINOP_IMMU_DF(slli, sll)
5471 MSA_BINOP_IMMU_DF(srai, sra)
5472 MSA_BINOP_IMMU_DF(srli, srl)
5473 MSA_BINOP_IMMU_DF(bclri, bclr)
5474 MSA_BINOP_IMMU_DF(bseti, bset)
5475 MSA_BINOP_IMMU_DF(bnegi, bneg)
5476 MSA_BINOP_IMMU_DF(sat_s, sat_s)
5477 MSA_BINOP_IMMU_DF(sat_u, sat_u)
5478 MSA_BINOP_IMMU_DF(srari, srar)
5479 MSA_BINOP_IMMU_DF(srlri, srlr)
5480 #undef MSA_BINOP_IMMU_DF
5481 
5482 #define MSA_TEROP_IMMU_DF(helper, func)                                  \
5483 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5484                                   uint32_t wd, uint32_t ws, uint32_t u5) \
5485 {                                                                       \
5486     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5487     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5488     uint32_t i;                                                         \
5489                                                                         \
5490     switch (df) {                                                       \
5491     case DF_BYTE:                                                       \
5492         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5493             pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
5494                                             u5);                        \
5495         }                                                               \
5496         break;                                                          \
5497     case DF_HALF:                                                       \
5498         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5499             pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
5500                                             u5);                        \
5501         }                                                               \
5502         break;                                                          \
5503     case DF_WORD:                                                       \
5504         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5505             pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
5506                                             u5);                        \
5507         }                                                               \
5508         break;                                                          \
5509     case DF_DOUBLE:                                                     \
5510         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5511             pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
5512                                             u5);                        \
5513         }                                                               \
5514         break;                                                          \
5515     default:                                                            \
5516         g_assert_not_reached();                                         \
5517     }                                                                   \
5518 }
5519 
5520 MSA_TEROP_IMMU_DF(binsli, binsl)
5521 MSA_TEROP_IMMU_DF(binsri, binsr)
5522 #undef MSA_TEROP_IMMU_DF
5523 
5524 #define CONCATENATE_AND_SLIDE(s, k)             \
5525     do {                                        \
5526         for (i = 0; i < s; i++) {               \
5527             v[i]     = pws->b[s * k + i];       \
5528             v[i + s] = pwd->b[s * k + i];       \
5529         }                                       \
5530         for (i = 0; i < s; i++) {               \
5531             pwd->b[s * k + i] = v[i + n];       \
5532         }                                       \
5533     } while (0)
5534 
5535 static inline void msa_sld_df(uint32_t df, wr_t *pwd,
5536                               wr_t *pws, target_ulong rt)
5537 {
5538     uint32_t n = rt % DF_ELEMENTS(df);
5539     uint8_t v[64];
5540     uint32_t i, k;
5541 
5542     switch (df) {
5543     case DF_BYTE:
5544         CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
5545         break;
5546     case DF_HALF:
5547         for (k = 0; k < 2; k++) {
5548             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
5549         }
5550         break;
5551     case DF_WORD:
5552         for (k = 0; k < 4; k++) {
5553             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
5554         }
5555         break;
5556     case DF_DOUBLE:
5557         for (k = 0; k < 8; k++) {
5558             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
5559         }
5560         break;
5561     default:
5562         g_assert_not_reached();
5563     }
5564 }
5565 
msa_mul_q_df(uint32_t df,int64_t arg1,int64_t arg2)5566 static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5567 {
5568     int64_t q_min = DF_MIN_INT(df);
5569     int64_t q_max = DF_MAX_INT(df);
5570 
5571     if (arg1 == q_min && arg2 == q_min) {
5572         return q_max;
5573     }
5574     return (arg1 * arg2) >> (DF_BITS(df) - 1);
5575 }
5576 
msa_mulr_q_df(uint32_t df,int64_t arg1,int64_t arg2)5577 static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5578 {
5579     int64_t q_min = DF_MIN_INT(df);
5580     int64_t q_max = DF_MAX_INT(df);
5581     int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5582 
5583     if (arg1 == q_min && arg2 == q_min) {
5584         return q_max;
5585     }
5586     return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
5587 }
5588 
5589 #define MSA_BINOP_DF(func) \
5590 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
5591                                 uint32_t wd, uint32_t ws, uint32_t wt)  \
5592 {                                                                       \
5593     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5594     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5595     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
5596                                                                         \
5597     switch (df) {                                                       \
5598     case DF_BYTE:                                                       \
5599         pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
5600         pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
5601         pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
5602         pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
5603         pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
5604         pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
5605         pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
5606         pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
5607         pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
5608         pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
5609         pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
5610         pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
5611         pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
5612         pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
5613         pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
5614         pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
5615         break;                                                          \
5616     case DF_HALF:                                                       \
5617         pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
5618         pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
5619         pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
5620         pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
5621         pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
5622         pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
5623         pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
5624         pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
5625         break;                                                          \
5626     case DF_WORD:                                                       \
5627         pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
5628         pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
5629         pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
5630         pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
5631         break;                                                          \
5632     case DF_DOUBLE:                                                     \
5633         pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
5634         pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
5635         break;                                                          \
5636     default:                                                            \
5637         g_assert_not_reached();                                         \
5638     }                                                                   \
5639 }
5640 
5641 MSA_BINOP_DF(mul_q)
MSA_BINOP_DF(mulr_q)5642 MSA_BINOP_DF(mulr_q)
5643 #undef MSA_BINOP_DF
5644 
5645 void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5646                        uint32_t ws, uint32_t rt)
5647 {
5648     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5649     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5650 
5651     msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
5652 }
5653 
msa_madd_q_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)5654 static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
5655                                     int64_t arg2)
5656 {
5657     int64_t q_prod, q_ret;
5658 
5659     int64_t q_max = DF_MAX_INT(df);
5660     int64_t q_min = DF_MIN_INT(df);
5661 
5662     q_prod = arg1 * arg2;
5663     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
5664 
5665     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5666 }
5667 
msa_msub_q_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)5668 static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
5669                                     int64_t arg2)
5670 {
5671     int64_t q_prod, q_ret;
5672 
5673     int64_t q_max = DF_MAX_INT(df);
5674     int64_t q_min = DF_MIN_INT(df);
5675 
5676     q_prod = arg1 * arg2;
5677     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
5678 
5679     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5680 }
5681 
msa_maddr_q_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)5682 static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5683                                      int64_t arg2)
5684 {
5685     int64_t q_prod, q_ret;
5686 
5687     int64_t q_max = DF_MAX_INT(df);
5688     int64_t q_min = DF_MIN_INT(df);
5689     int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5690 
5691     q_prod = arg1 * arg2;
5692     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
5693 
5694     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5695 }
5696 
msa_msubr_q_df(uint32_t df,int64_t dest,int64_t arg1,int64_t arg2)5697 static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5698                                      int64_t arg2)
5699 {
5700     int64_t q_prod, q_ret;
5701 
5702     int64_t q_max = DF_MAX_INT(df);
5703     int64_t q_min = DF_MIN_INT(df);
5704     int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5705 
5706     q_prod = arg1 * arg2;
5707     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
5708 
5709     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5710 }
5711 
5712 #define MSA_TEROP_DF(func) \
5713 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
5714                                 uint32_t ws, uint32_t wt)                     \
5715 {                                                                             \
5716     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
5717     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
5718     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
5719                                                                               \
5720     switch (df) {                                                             \
5721     case DF_BYTE:                                                             \
5722         pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
5723                                              pwt->b[0]);                      \
5724         pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
5725                                              pwt->b[1]);                      \
5726         pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
5727                                              pwt->b[2]);                      \
5728         pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
5729                                              pwt->b[3]);                      \
5730         pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
5731                                              pwt->b[4]);                      \
5732         pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
5733                                              pwt->b[5]);                      \
5734         pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
5735                                              pwt->b[6]);                      \
5736         pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
5737                                              pwt->b[7]);                      \
5738         pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
5739                                              pwt->b[8]);                      \
5740         pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
5741                                              pwt->b[9]);                      \
5742         pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
5743                                              pwt->b[10]);                     \
5744         pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
5745                                              pwt->b[11]);                     \
5746         pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
5747                                              pwt->b[12]);                     \
5748         pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
5749                                              pwt->b[13]);                     \
5750         pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
5751                                              pwt->b[14]);                     \
5752         pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
5753                                              pwt->b[15]);                     \
5754         break;                                                                \
5755     case DF_HALF:                                                             \
5756         pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
5757         pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
5758         pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
5759         pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
5760         pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
5761         pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
5762         pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
5763         pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
5764         break;                                                                \
5765     case DF_WORD:                                                             \
5766         pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
5767         pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
5768         pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
5769         pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
5770         break;                                                                \
5771     case DF_DOUBLE:                                                           \
5772         pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
5773         pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
5774         break;                                                                \
5775     default:                                                                  \
5776         g_assert_not_reached();                                               \
5777     }                                                                         \
5778 }
5779 
5780 MSA_TEROP_DF(binsl)
MSA_TEROP_DF(binsr)5781 MSA_TEROP_DF(binsr)
5782 MSA_TEROP_DF(madd_q)
5783 MSA_TEROP_DF(msub_q)
5784 MSA_TEROP_DF(maddr_q)
5785 MSA_TEROP_DF(msubr_q)
5786 #undef MSA_TEROP_DF
5787 
5788 static inline void msa_splat_df(uint32_t df, wr_t *pwd,
5789                                 wr_t *pws, target_ulong rt)
5790 {
5791     uint32_t n = rt % DF_ELEMENTS(df);
5792     uint32_t i;
5793 
5794     switch (df) {
5795     case DF_BYTE:
5796         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5797             pwd->b[i] = pws->b[n];
5798         }
5799         break;
5800     case DF_HALF:
5801         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5802             pwd->h[i] = pws->h[n];
5803         }
5804         break;
5805     case DF_WORD:
5806         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5807             pwd->w[i] = pws->w[n];
5808         }
5809         break;
5810     case DF_DOUBLE:
5811         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5812             pwd->d[i] = pws->d[n];
5813         }
5814        break;
5815     default:
5816         g_assert_not_reached();
5817     }
5818 }
5819 
helper_msa_splat_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t rt)5820 void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5821                          uint32_t ws, uint32_t rt)
5822 {
5823     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5824     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5825 
5826     msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
5827 }
5828 
5829 #define MSA_DO_B MSA_DO(b)
5830 #define MSA_DO_H MSA_DO(h)
5831 #define MSA_DO_W MSA_DO(w)
5832 #define MSA_DO_D MSA_DO(d)
5833 
5834 #define MSA_LOOP_B MSA_LOOP(B)
5835 #define MSA_LOOP_H MSA_LOOP(H)
5836 #define MSA_LOOP_W MSA_LOOP(W)
5837 #define MSA_LOOP_D MSA_LOOP(D)
5838 
5839 #define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
5840 #define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
5841 #define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
5842 #define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
5843 
5844 #define MSA_LOOP(DF) \
5845     do { \
5846         for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
5847             MSA_DO_ ## DF; \
5848         } \
5849     } while (0)
5850 
5851 #define MSA_FN_DF(FUNC)                                             \
5852 void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5853         uint32_t ws, uint32_t wt)                                   \
5854 {                                                                   \
5855     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
5856     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
5857     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
5858     wr_t wx, *pwx = &wx;                                            \
5859     uint32_t i;                                                     \
5860     switch (df) {                                                   \
5861     case DF_BYTE:                                                   \
5862         MSA_LOOP_B;                                                 \
5863         break;                                                      \
5864     case DF_HALF:                                                   \
5865         MSA_LOOP_H;                                                 \
5866         break;                                                      \
5867     case DF_WORD:                                                   \
5868         MSA_LOOP_W;                                                 \
5869         break;                                                      \
5870     case DF_DOUBLE:                                                 \
5871         MSA_LOOP_D;                                                 \
5872         break;                                                      \
5873     default:                                                        \
5874         g_assert_not_reached();                                     \
5875     }                                                               \
5876     msa_move_v(pwd, pwx);                                           \
5877 }
5878 
5879 #define MSA_LOOP_COND(DF) \
5880             (DF_ELEMENTS(DF) / 2)
5881 
5882 #define Rb(pwr, i) (pwr->b[i])
5883 #define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
5884 #define Rh(pwr, i) (pwr->h[i])
5885 #define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
5886 #define Rw(pwr, i) (pwr->w[i])
5887 #define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
5888 #define Rd(pwr, i) (pwr->d[i])
5889 #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
5890 
5891 #undef MSA_LOOP_COND
5892 
5893 #define MSA_LOOP_COND(DF) \
5894             (DF_ELEMENTS(DF))
5895 
5896 #define MSA_DO(DF)                                                          \
5897     do {                                                                    \
5898         uint32_t n = DF_ELEMENTS(df);                                       \
5899         uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
5900         pwx->DF[i] =                                                        \
5901             (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
5902     } while (0)
MSA_FN_DF(vshf_df)5903 MSA_FN_DF(vshf_df)
5904 #undef MSA_DO
5905 #undef MSA_LOOP_COND
5906 #undef MSA_FN_DF
5907 
5908 
5909 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5910                         uint32_t ws, uint32_t n)
5911 {
5912     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5913     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5914 
5915     msa_sld_df(df, pwd, pws, n);
5916 }
5917 
helper_msa_splati_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t n)5918 void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5919                           uint32_t ws, uint32_t n)
5920 {
5921     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5922     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5923 
5924     msa_splat_df(df, pwd, pws, n);
5925 }
5926 
helper_msa_copy_s_b(CPUMIPSState * env,uint32_t rd,uint32_t ws,uint32_t n)5927 void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
5928                          uint32_t ws, uint32_t n)
5929 {
5930     n %= 16;
5931 #if HOST_BIG_ENDIAN
5932     if (n < 8) {
5933         n = 8 - n - 1;
5934     } else {
5935         n = 24 - n - 1;
5936     }
5937 #endif
5938     env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
5939 }
5940 
helper_msa_copy_s_h(CPUMIPSState * env,uint32_t rd,uint32_t ws,uint32_t n)5941 void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
5942                          uint32_t ws, uint32_t n)
5943 {
5944     n %= 8;
5945 #if HOST_BIG_ENDIAN
5946     if (n < 4) {
5947         n = 4 - n - 1;
5948     } else {
5949         n = 12 - n - 1;
5950     }
5951 #endif
5952     env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
5953 }
5954 
helper_msa_copy_s_w(CPUMIPSState * env,uint32_t rd,uint32_t ws,uint32_t n)5955 void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
5956                          uint32_t ws, uint32_t n)
5957 {
5958     n %= 4;
5959 #if HOST_BIG_ENDIAN
5960     if (n < 2) {
5961         n = 2 - n - 1;
5962     } else {
5963         n = 6 - n - 1;
5964     }
5965 #endif
5966     env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
5967 }
5968 
helper_msa_copy_s_d(CPUMIPSState * env,uint32_t rd,uint32_t ws,uint32_t n)5969 void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
5970                          uint32_t ws, uint32_t n)
5971 {
5972     n %= 2;
5973     env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
5974 }
5975 
helper_msa_copy_u_b(CPUMIPSState * env,uint32_t rd,uint32_t ws,uint32_t n)5976 void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
5977                          uint32_t ws, uint32_t n)
5978 {
5979     n %= 16;
5980 #if HOST_BIG_ENDIAN
5981     if (n < 8) {
5982         n = 8 - n - 1;
5983     } else {
5984         n = 24 - n - 1;
5985     }
5986 #endif
5987     env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
5988 }
5989 
helper_msa_copy_u_h(CPUMIPSState * env,uint32_t rd,uint32_t ws,uint32_t n)5990 void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
5991                          uint32_t ws, uint32_t n)
5992 {
5993     n %= 8;
5994 #if HOST_BIG_ENDIAN
5995     if (n < 4) {
5996         n = 4 - n - 1;
5997     } else {
5998         n = 12 - n - 1;
5999     }
6000 #endif
6001     env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
6002 }
6003 
helper_msa_copy_u_w(CPUMIPSState * env,uint32_t rd,uint32_t ws,uint32_t n)6004 void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
6005                          uint32_t ws, uint32_t n)
6006 {
6007     n %= 4;
6008 #if HOST_BIG_ENDIAN
6009     if (n < 2) {
6010         n = 2 - n - 1;
6011     } else {
6012         n = 6 - n - 1;
6013     }
6014 #endif
6015     env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
6016 }
6017 
helper_msa_insert_b(CPUMIPSState * env,uint32_t wd,uint32_t rs_num,uint32_t n)6018 void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
6019                           uint32_t rs_num, uint32_t n)
6020 {
6021     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6022     target_ulong rs = env->active_tc.gpr[rs_num];
6023     n %= 16;
6024 #if HOST_BIG_ENDIAN
6025     if (n < 8) {
6026         n = 8 - n - 1;
6027     } else {
6028         n = 24 - n - 1;
6029     }
6030 #endif
6031     pwd->b[n] = (int8_t)rs;
6032 }
6033 
helper_msa_insert_h(CPUMIPSState * env,uint32_t wd,uint32_t rs_num,uint32_t n)6034 void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
6035                           uint32_t rs_num, uint32_t n)
6036 {
6037     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6038     target_ulong rs = env->active_tc.gpr[rs_num];
6039     n %= 8;
6040 #if HOST_BIG_ENDIAN
6041     if (n < 4) {
6042         n = 4 - n - 1;
6043     } else {
6044         n = 12 - n - 1;
6045     }
6046 #endif
6047     pwd->h[n] = (int16_t)rs;
6048 }
6049 
helper_msa_insert_w(CPUMIPSState * env,uint32_t wd,uint32_t rs_num,uint32_t n)6050 void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
6051                           uint32_t rs_num, uint32_t n)
6052 {
6053     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6054     target_ulong rs = env->active_tc.gpr[rs_num];
6055     n %= 4;
6056 #if HOST_BIG_ENDIAN
6057     if (n < 2) {
6058         n = 2 - n - 1;
6059     } else {
6060         n = 6 - n - 1;
6061     }
6062 #endif
6063     pwd->w[n] = (int32_t)rs;
6064 }
6065 
helper_msa_insert_d(CPUMIPSState * env,uint32_t wd,uint32_t rs_num,uint32_t n)6066 void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
6067                           uint32_t rs_num, uint32_t n)
6068 {
6069     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6070     target_ulong rs = env->active_tc.gpr[rs_num];
6071     n %= 2;
6072     pwd->d[n] = (int64_t)rs;
6073 }
6074 
helper_msa_insve_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t n)6075 void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6076                          uint32_t ws, uint32_t n)
6077 {
6078     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6079     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6080 
6081     switch (df) {
6082     case DF_BYTE:
6083         pwd->b[n] = (int8_t)pws->b[0];
6084         break;
6085     case DF_HALF:
6086         pwd->h[n] = (int16_t)pws->h[0];
6087         break;
6088     case DF_WORD:
6089         pwd->w[n] = (int32_t)pws->w[0];
6090         break;
6091     case DF_DOUBLE:
6092         pwd->d[n] = (int64_t)pws->d[0];
6093         break;
6094     default:
6095         g_assert_not_reached();
6096     }
6097 }
6098 
helper_msa_ctcmsa(CPUMIPSState * env,target_ulong elm,uint32_t cd)6099 void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
6100 {
6101     switch (cd) {
6102     case 0:
6103         break;
6104     case 1:
6105         env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
6106         restore_msa_fp_status(env);
6107         /* check exception */
6108         if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
6109             & GET_FP_CAUSE(env->active_tc.msacsr)) {
6110             do_raise_exception(env, EXCP_MSAFPE, GETPC());
6111         }
6112         break;
6113     }
6114 }
6115 
helper_msa_cfcmsa(CPUMIPSState * env,uint32_t cs)6116 target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
6117 {
6118     switch (cs) {
6119     case 0:
6120         return env->msair;
6121     case 1:
6122         return env->active_tc.msacsr & MSACSR_MASK;
6123     }
6124     return 0;
6125 }
6126 
helper_msa_fill_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t rs)6127 void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6128                         uint32_t rs)
6129 {
6130     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6131     uint32_t i;
6132 
6133     switch (df) {
6134     case DF_BYTE:
6135         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
6136             pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
6137         }
6138         break;
6139     case DF_HALF:
6140         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
6141             pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
6142         }
6143         break;
6144     case DF_WORD:
6145         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6146             pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
6147         }
6148         break;
6149     case DF_DOUBLE:
6150         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6151             pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
6152         }
6153        break;
6154     default:
6155         g_assert_not_reached();
6156     }
6157 }
6158 
6159 
6160 #define FLOAT_ONE32 make_float32(0x3f8 << 20)
6161 #define FLOAT_ONE64 make_float64(0x3ffULL << 52)
6162 
6163 #define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
6164         /* 0x7c20 */
6165 #define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
6166         /* 0x7f800020 */
6167 #define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
6168         /* 0x7ff0000000000020 */
6169 
clear_msacsr_cause(CPUMIPSState * env)6170 static inline void clear_msacsr_cause(CPUMIPSState *env)
6171 {
6172     SET_FP_CAUSE(env->active_tc.msacsr, 0);
6173 }
6174 
check_msacsr_cause(CPUMIPSState * env,uintptr_t retaddr)6175 static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
6176 {
6177     if ((GET_FP_CAUSE(env->active_tc.msacsr) &
6178             (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
6179         UPDATE_FP_FLAGS(env->active_tc.msacsr,
6180                 GET_FP_CAUSE(env->active_tc.msacsr));
6181     } else {
6182         do_raise_exception(env, EXCP_MSAFPE, retaddr);
6183     }
6184 }
6185 
6186 /* Flush-to-zero use cases for update_msacsr() */
6187 #define CLEAR_FS_UNDERFLOW 1
6188 #define CLEAR_IS_INEXACT   2
6189 #define RECIPROCAL_INEXACT 4
6190 
6191 
ieee_to_mips_xcpt_msa(int ieee_xcpt)6192 static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
6193 {
6194     int mips_xcpt = 0;
6195 
6196     if (ieee_xcpt & float_flag_invalid) {
6197         mips_xcpt |= FP_INVALID;
6198     }
6199     if (ieee_xcpt & float_flag_overflow) {
6200         mips_xcpt |= FP_OVERFLOW;
6201     }
6202     if (ieee_xcpt & float_flag_underflow) {
6203         mips_xcpt |= FP_UNDERFLOW;
6204     }
6205     if (ieee_xcpt & float_flag_divbyzero) {
6206         mips_xcpt |= FP_DIV0;
6207     }
6208     if (ieee_xcpt & float_flag_inexact) {
6209         mips_xcpt |= FP_INEXACT;
6210     }
6211 
6212     return mips_xcpt;
6213 }
6214 
update_msacsr(CPUMIPSState * env,int action,int denormal)6215 static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
6216 {
6217     int ieee_exception_flags;
6218     int mips_exception_flags = 0;
6219     int cause;
6220     int enable;
6221 
6222     ieee_exception_flags = get_float_exception_flags(
6223                                &env->active_tc.msa_fp_status);
6224 
6225     /* QEMU softfloat does not signal all underflow cases */
6226     if (denormal) {
6227         ieee_exception_flags |= float_flag_underflow;
6228     }
6229     if (ieee_exception_flags) {
6230         mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
6231     }
6232     enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6233 
6234     /* Set Inexact (I) when flushing inputs to zero */
6235     if ((ieee_exception_flags & float_flag_input_denormal_flushed) &&
6236             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6237         if (action & CLEAR_IS_INEXACT) {
6238             mips_exception_flags &= ~FP_INEXACT;
6239         } else {
6240             mips_exception_flags |= FP_INEXACT;
6241         }
6242     }
6243 
6244     /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
6245     if ((ieee_exception_flags & float_flag_output_denormal_flushed) &&
6246             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6247         mips_exception_flags |= FP_INEXACT;
6248         if (action & CLEAR_FS_UNDERFLOW) {
6249             mips_exception_flags &= ~FP_UNDERFLOW;
6250         } else {
6251             mips_exception_flags |= FP_UNDERFLOW;
6252         }
6253     }
6254 
6255     /* Set Inexact (I) when Overflow (O) is not enabled */
6256     if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
6257            (enable & FP_OVERFLOW) == 0) {
6258         mips_exception_flags |= FP_INEXACT;
6259     }
6260 
6261     /* Clear Exact Underflow when Underflow (U) is not enabled */
6262     if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
6263            (enable & FP_UNDERFLOW) == 0 &&
6264            (mips_exception_flags & FP_INEXACT) == 0) {
6265         mips_exception_flags &= ~FP_UNDERFLOW;
6266     }
6267 
6268     /*
6269      * Reciprocal operations set only Inexact when valid and not
6270      * divide by zero
6271      */
6272     if ((action & RECIPROCAL_INEXACT) &&
6273             (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
6274         mips_exception_flags = FP_INEXACT;
6275     }
6276 
6277     cause = mips_exception_flags & enable; /* all current enabled exceptions */
6278 
6279     if (cause == 0) {
6280         /*
6281          * No enabled exception, update the MSACSR Cause
6282          * with all current exceptions
6283          */
6284         SET_FP_CAUSE(env->active_tc.msacsr,
6285             (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6286     } else {
6287         /* Current exceptions are enabled */
6288         if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
6289             /*
6290              * Exception(s) will trap, update MSACSR Cause
6291              * with all enabled exceptions
6292              */
6293             SET_FP_CAUSE(env->active_tc.msacsr,
6294                 (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6295         }
6296     }
6297 
6298     return mips_exception_flags;
6299 }
6300 
get_enabled_exceptions(const CPUMIPSState * env,int c)6301 static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
6302 {
6303     int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6304     return c & enable;
6305 }
6306 
float16_from_float32(int32_t a,bool ieee,float_status * status)6307 static inline float16 float16_from_float32(int32_t a, bool ieee,
6308                                            float_status *status)
6309 {
6310       float16 f_val;
6311 
6312       f_val = float32_to_float16((float32)a, ieee, status);
6313 
6314       return a < 0 ? (f_val | (1 << 15)) : f_val;
6315 }
6316 
float32_from_float64(int64_t a,float_status * status)6317 static inline float32 float32_from_float64(int64_t a, float_status *status)
6318 {
6319       float32 f_val;
6320 
6321       f_val = float64_to_float32((float64)a, status);
6322 
6323       return a < 0 ? (f_val | (1 << 31)) : f_val;
6324 }
6325 
float32_from_float16(int16_t a,bool ieee,float_status * status)6326 static inline float32 float32_from_float16(int16_t a, bool ieee,
6327                                            float_status *status)
6328 {
6329       float32 f_val;
6330 
6331       f_val = float16_to_float32((float16)a, ieee, status);
6332 
6333       return a < 0 ? (f_val | (1 << 31)) : f_val;
6334 }
6335 
float64_from_float32(int32_t a,float_status * status)6336 static inline float64 float64_from_float32(int32_t a, float_status *status)
6337 {
6338       float64 f_val;
6339 
6340       f_val = float32_to_float64((float64)a, status);
6341 
6342       return a < 0 ? (f_val | (1ULL << 63)) : f_val;
6343 }
6344 
float32_from_q16(int16_t a,float_status * status)6345 static inline float32 float32_from_q16(int16_t a, float_status *status)
6346 {
6347     float32 f_val;
6348 
6349     /* conversion as integer and scaling */
6350     f_val = int32_to_float32(a, status);
6351     f_val = float32_scalbn(f_val, -15, status);
6352 
6353     return f_val;
6354 }
6355 
float64_from_q32(int32_t a,float_status * status)6356 static inline float64 float64_from_q32(int32_t a, float_status *status)
6357 {
6358     float64 f_val;
6359 
6360     /* conversion as integer and scaling */
6361     f_val = int32_to_float64(a, status);
6362     f_val = float64_scalbn(f_val, -31, status);
6363 
6364     return f_val;
6365 }
6366 
float32_to_q16(float32 a,float_status * status)6367 static inline int16_t float32_to_q16(float32 a, float_status *status)
6368 {
6369     int32_t q_val;
6370     int32_t q_min = 0xffff8000;
6371     int32_t q_max = 0x00007fff;
6372 
6373     int ieee_ex;
6374 
6375     if (float32_is_any_nan(a)) {
6376         float_raise(float_flag_invalid, status);
6377         return 0;
6378     }
6379 
6380     /* scaling */
6381     a = float32_scalbn(a, 15, status);
6382 
6383     ieee_ex = get_float_exception_flags(status);
6384     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6385                              , status);
6386 
6387     if (ieee_ex & float_flag_overflow) {
6388         float_raise(float_flag_inexact, status);
6389         return (int32_t)a < 0 ? q_min : q_max;
6390     }
6391 
6392     /* conversion to int */
6393     q_val = float32_to_int32(a, status);
6394 
6395     ieee_ex = get_float_exception_flags(status);
6396     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6397                              , status);
6398 
6399     if (ieee_ex & float_flag_invalid) {
6400         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6401                                , status);
6402         float_raise(float_flag_overflow | float_flag_inexact, status);
6403         return (int32_t)a < 0 ? q_min : q_max;
6404     }
6405 
6406     if (q_val < q_min) {
6407         float_raise(float_flag_overflow | float_flag_inexact, status);
6408         return (int16_t)q_min;
6409     }
6410 
6411     if (q_max < q_val) {
6412         float_raise(float_flag_overflow | float_flag_inexact, status);
6413         return (int16_t)q_max;
6414     }
6415 
6416     return (int16_t)q_val;
6417 }
6418 
float64_to_q32(float64 a,float_status * status)6419 static inline int32_t float64_to_q32(float64 a, float_status *status)
6420 {
6421     int64_t q_val;
6422     int64_t q_min = 0xffffffff80000000LL;
6423     int64_t q_max = 0x000000007fffffffLL;
6424 
6425     int ieee_ex;
6426 
6427     if (float64_is_any_nan(a)) {
6428         float_raise(float_flag_invalid, status);
6429         return 0;
6430     }
6431 
6432     /* scaling */
6433     a = float64_scalbn(a, 31, status);
6434 
6435     ieee_ex = get_float_exception_flags(status);
6436     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6437            , status);
6438 
6439     if (ieee_ex & float_flag_overflow) {
6440         float_raise(float_flag_inexact, status);
6441         return (int64_t)a < 0 ? q_min : q_max;
6442     }
6443 
6444     /* conversion to integer */
6445     q_val = float64_to_int64(a, status);
6446 
6447     ieee_ex = get_float_exception_flags(status);
6448     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6449            , status);
6450 
6451     if (ieee_ex & float_flag_invalid) {
6452         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6453                , status);
6454         float_raise(float_flag_overflow | float_flag_inexact, status);
6455         return (int64_t)a < 0 ? q_min : q_max;
6456     }
6457 
6458     if (q_val < q_min) {
6459         float_raise(float_flag_overflow | float_flag_inexact, status);
6460         return (int32_t)q_min;
6461     }
6462 
6463     if (q_max < q_val) {
6464         float_raise(float_flag_overflow | float_flag_inexact, status);
6465         return (int32_t)q_max;
6466     }
6467 
6468     return (int32_t)q_val;
6469 }
6470 
6471 #define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
6472     do {                                                                    \
6473         float_status *status = &env->active_tc.msa_fp_status;               \
6474         int c;                                                              \
6475         int64_t cond;                                                       \
6476         set_float_exception_flags(0, status);                               \
6477         if (!QUIET) {                                                       \
6478             cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
6479         } else {                                                            \
6480             cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
6481         }                                                                   \
6482         DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
6483         c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
6484                                                                             \
6485         if (get_enabled_exceptions(env, c)) {                               \
6486             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
6487         }                                                                   \
6488     } while (0)
6489 
6490 #define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
6491     do {                                                            \
6492         MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
6493         if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
6494             DEST = 0;                                               \
6495         }                                                           \
6496     } while (0)
6497 
6498 #define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
6499     do {                                                            \
6500         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6501         if (DEST == 0) {                                            \
6502             MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
6503         }                                                           \
6504     } while (0)
6505 
6506 #define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
6507     do {                                                            \
6508         MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
6509         if (DEST == 0) {                                            \
6510             MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
6511         }                                                           \
6512     } while (0)
6513 
6514 #define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
6515     do {                                                            \
6516         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6517         if (DEST == 0) {                                            \
6518             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6519             if (DEST == 0) {                                        \
6520                 MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
6521             }                                                       \
6522         }                                                           \
6523     } while (0)
6524 
6525 #define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
6526     do {                                                            \
6527         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6528         if (DEST == 0) {                                            \
6529             MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
6530         }                                                           \
6531     } while (0)
6532 
6533 #define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
6534     do {                                                            \
6535         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6536         if (DEST == 0) {                                            \
6537             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6538         }                                                           \
6539     } while (0)
6540 
6541 #define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
6542     do {                                                            \
6543         MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
6544         if (DEST == 0) {                                            \
6545             MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
6546         }                                                           \
6547     } while (0)
6548 
compare_af(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6549 static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6550                               wr_t *pwt, uint32_t df, int quiet,
6551                               uintptr_t retaddr)
6552 {
6553     wr_t wx, *pwx = &wx;
6554     uint32_t i;
6555 
6556     clear_msacsr_cause(env);
6557 
6558     switch (df) {
6559     case DF_WORD:
6560         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6561             MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6562         }
6563         break;
6564     case DF_DOUBLE:
6565         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6566             MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6567         }
6568         break;
6569     default:
6570         g_assert_not_reached();
6571     }
6572 
6573     check_msacsr_cause(env, retaddr);
6574 
6575     msa_move_v(pwd, pwx);
6576 }
6577 
compare_un(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6578 static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6579                               wr_t *pwt, uint32_t df, int quiet,
6580                               uintptr_t retaddr)
6581 {
6582     wr_t wx, *pwx = &wx;
6583     uint32_t i;
6584 
6585     clear_msacsr_cause(env);
6586 
6587     switch (df) {
6588     case DF_WORD:
6589         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6590             MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
6591                     quiet);
6592         }
6593         break;
6594     case DF_DOUBLE:
6595         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6596             MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
6597                     quiet);
6598         }
6599         break;
6600     default:
6601         g_assert_not_reached();
6602     }
6603 
6604     check_msacsr_cause(env, retaddr);
6605 
6606     msa_move_v(pwd, pwx);
6607 }
6608 
compare_eq(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6609 static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6610                               wr_t *pwt, uint32_t df, int quiet,
6611                               uintptr_t retaddr)
6612 {
6613     wr_t wx, *pwx = &wx;
6614     uint32_t i;
6615 
6616     clear_msacsr_cause(env);
6617 
6618     switch (df) {
6619     case DF_WORD:
6620         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6621             MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
6622         }
6623         break;
6624     case DF_DOUBLE:
6625         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6626             MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
6627         }
6628         break;
6629     default:
6630         g_assert_not_reached();
6631     }
6632 
6633     check_msacsr_cause(env, retaddr);
6634 
6635     msa_move_v(pwd, pwx);
6636 }
6637 
compare_ueq(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6638 static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6639                                wr_t *pwt, uint32_t df, int quiet,
6640                                uintptr_t retaddr)
6641 {
6642     wr_t wx, *pwx = &wx;
6643     uint32_t i;
6644 
6645     clear_msacsr_cause(env);
6646 
6647     switch (df) {
6648     case DF_WORD:
6649         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6650             MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6651         }
6652         break;
6653     case DF_DOUBLE:
6654         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6655             MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6656         }
6657         break;
6658     default:
6659         g_assert_not_reached();
6660     }
6661 
6662     check_msacsr_cause(env, retaddr);
6663 
6664     msa_move_v(pwd, pwx);
6665 }
6666 
compare_lt(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6667 static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6668                               wr_t *pwt, uint32_t df, int quiet,
6669                               uintptr_t retaddr)
6670 {
6671     wr_t wx, *pwx = &wx;
6672     uint32_t i;
6673 
6674     clear_msacsr_cause(env);
6675 
6676     switch (df) {
6677     case DF_WORD:
6678         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6679             MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
6680         }
6681         break;
6682     case DF_DOUBLE:
6683         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6684             MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
6685         }
6686         break;
6687     default:
6688         g_assert_not_reached();
6689     }
6690 
6691     check_msacsr_cause(env, retaddr);
6692 
6693     msa_move_v(pwd, pwx);
6694 }
6695 
compare_ult(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6696 static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6697                                wr_t *pwt, uint32_t df, int quiet,
6698                                uintptr_t retaddr)
6699 {
6700     wr_t wx, *pwx = &wx;
6701     uint32_t i;
6702 
6703     clear_msacsr_cause(env);
6704 
6705     switch (df) {
6706     case DF_WORD:
6707         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6708             MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6709         }
6710         break;
6711     case DF_DOUBLE:
6712         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6713             MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6714         }
6715         break;
6716     default:
6717         g_assert_not_reached();
6718     }
6719 
6720     check_msacsr_cause(env, retaddr);
6721 
6722     msa_move_v(pwd, pwx);
6723 }
6724 
compare_le(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6725 static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6726                               wr_t *pwt, uint32_t df, int quiet,
6727                               uintptr_t retaddr)
6728 {
6729     wr_t wx, *pwx = &wx;
6730     uint32_t i;
6731 
6732     clear_msacsr_cause(env);
6733 
6734     switch (df) {
6735     case DF_WORD:
6736         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6737             MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
6738         }
6739         break;
6740     case DF_DOUBLE:
6741         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6742             MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
6743         }
6744         break;
6745     default:
6746         g_assert_not_reached();
6747     }
6748 
6749     check_msacsr_cause(env, retaddr);
6750 
6751     msa_move_v(pwd, pwx);
6752 }
6753 
compare_ule(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6754 static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6755                                wr_t *pwt, uint32_t df, int quiet,
6756                                uintptr_t retaddr)
6757 {
6758     wr_t wx, *pwx = &wx;
6759     uint32_t i;
6760 
6761     clear_msacsr_cause(env);
6762 
6763     switch (df) {
6764     case DF_WORD:
6765         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6766             MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6767         }
6768         break;
6769     case DF_DOUBLE:
6770         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6771             MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6772         }
6773         break;
6774     default:
6775         g_assert_not_reached();
6776     }
6777 
6778     check_msacsr_cause(env, retaddr);
6779 
6780     msa_move_v(pwd, pwx);
6781 }
6782 
compare_or(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6783 static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6784                               wr_t *pwt, uint32_t df, int quiet,
6785                               uintptr_t retaddr)
6786 {
6787     wr_t wx, *pwx = &wx;
6788     uint32_t i;
6789 
6790     clear_msacsr_cause(env);
6791 
6792     switch (df) {
6793     case DF_WORD:
6794         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6795             MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6796         }
6797         break;
6798     case DF_DOUBLE:
6799         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6800             MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6801         }
6802         break;
6803     default:
6804         g_assert_not_reached();
6805     }
6806 
6807     check_msacsr_cause(env, retaddr);
6808 
6809     msa_move_v(pwd, pwx);
6810 }
6811 
compare_une(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6812 static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6813                                wr_t *pwt, uint32_t df, int quiet,
6814                                uintptr_t retaddr)
6815 {
6816     wr_t wx, *pwx = &wx;
6817     uint32_t i;
6818 
6819     clear_msacsr_cause(env);
6820 
6821     switch (df) {
6822     case DF_WORD:
6823         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6824             MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6825         }
6826         break;
6827     case DF_DOUBLE:
6828         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6829             MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6830         }
6831         break;
6832     default:
6833         g_assert_not_reached();
6834     }
6835 
6836     check_msacsr_cause(env, retaddr);
6837 
6838     msa_move_v(pwd, pwx);
6839 }
6840 
compare_ne(CPUMIPSState * env,wr_t * pwd,wr_t * pws,wr_t * pwt,uint32_t df,int quiet,uintptr_t retaddr)6841 static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6842                               wr_t *pwt, uint32_t df, int quiet,
6843                               uintptr_t retaddr)
6844 {
6845     wr_t wx, *pwx = &wx;
6846     uint32_t i;
6847 
6848     clear_msacsr_cause(env);
6849 
6850     switch (df) {
6851     case DF_WORD:
6852         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6853             MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6854         }
6855         break;
6856     case DF_DOUBLE:
6857         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6858             MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6859         }
6860         break;
6861     default:
6862         g_assert_not_reached();
6863     }
6864 
6865     check_msacsr_cause(env, retaddr);
6866 
6867     msa_move_v(pwd, pwx);
6868 }
6869 
helper_msa_fcaf_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6870 void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6871                         uint32_t ws, uint32_t wt)
6872 {
6873     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6874     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6875     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6876     compare_af(env, pwd, pws, pwt, df, 1, GETPC());
6877 }
6878 
helper_msa_fcun_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6879 void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6880                         uint32_t ws, uint32_t wt)
6881 {
6882     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6883     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6884     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6885     compare_un(env, pwd, pws, pwt, df, 1, GETPC());
6886 }
6887 
helper_msa_fceq_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6888 void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6889                         uint32_t ws, uint32_t wt)
6890 {
6891     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6892     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6893     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6894     compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
6895 }
6896 
helper_msa_fcueq_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6897 void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6898                          uint32_t ws, uint32_t wt)
6899 {
6900     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6901     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6902     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6903     compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
6904 }
6905 
helper_msa_fclt_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6906 void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6907                         uint32_t ws, uint32_t wt)
6908 {
6909     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6910     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6911     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6912     compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
6913 }
6914 
helper_msa_fcult_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6915 void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6916                          uint32_t ws, uint32_t wt)
6917 {
6918     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6919     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6920     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6921     compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
6922 }
6923 
helper_msa_fcle_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6924 void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6925                         uint32_t ws, uint32_t wt)
6926 {
6927     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6928     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6929     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6930     compare_le(env, pwd, pws, pwt, df, 1, GETPC());
6931 }
6932 
helper_msa_fcule_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6933 void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6934                          uint32_t ws, uint32_t wt)
6935 {
6936     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6937     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6938     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6939     compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
6940 }
6941 
helper_msa_fsaf_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6942 void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6943                         uint32_t ws, uint32_t wt)
6944 {
6945     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6946     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6947     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6948     compare_af(env, pwd, pws, pwt, df, 0, GETPC());
6949 }
6950 
helper_msa_fsun_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6951 void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6952                         uint32_t ws, uint32_t wt)
6953 {
6954     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6955     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6956     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6957     compare_un(env, pwd, pws, pwt, df, 0, GETPC());
6958 }
6959 
helper_msa_fseq_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6960 void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6961                         uint32_t ws, uint32_t wt)
6962 {
6963     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6964     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6965     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6966     compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
6967 }
6968 
helper_msa_fsueq_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6969 void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6970                          uint32_t ws, uint32_t wt)
6971 {
6972     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6973     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6974     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6975     compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
6976 }
6977 
helper_msa_fslt_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6978 void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6979                         uint32_t ws, uint32_t wt)
6980 {
6981     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6982     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6983     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6984     compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
6985 }
6986 
helper_msa_fsult_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6987 void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6988                          uint32_t ws, uint32_t wt)
6989 {
6990     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6991     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6992     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6993     compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
6994 }
6995 
helper_msa_fsle_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)6996 void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6997                         uint32_t ws, uint32_t wt)
6998 {
6999     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7000     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7001     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7002     compare_le(env, pwd, pws, pwt, df, 0, GETPC());
7003 }
7004 
helper_msa_fsule_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7005 void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7006                          uint32_t ws, uint32_t wt)
7007 {
7008     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7009     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7010     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7011     compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
7012 }
7013 
helper_msa_fcor_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7014 void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7015                         uint32_t ws, uint32_t wt)
7016 {
7017     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7018     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7019     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7020     compare_or(env, pwd, pws, pwt, df, 1, GETPC());
7021 }
7022 
helper_msa_fcune_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7023 void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7024                          uint32_t ws, uint32_t wt)
7025 {
7026     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7027     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7028     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7029     compare_une(env, pwd, pws, pwt, df, 1, GETPC());
7030 }
7031 
helper_msa_fcne_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7032 void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7033                         uint32_t ws, uint32_t wt)
7034 {
7035     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7036     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7037     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7038     compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
7039 }
7040 
helper_msa_fsor_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7041 void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7042                         uint32_t ws, uint32_t wt)
7043 {
7044     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7045     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7046     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7047     compare_or(env, pwd, pws, pwt, df, 0, GETPC());
7048 }
7049 
helper_msa_fsune_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7050 void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7051                          uint32_t ws, uint32_t wt)
7052 {
7053     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7054     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7055     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7056     compare_une(env, pwd, pws, pwt, df, 0, GETPC());
7057 }
7058 
helper_msa_fsne_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7059 void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7060                         uint32_t ws, uint32_t wt)
7061 {
7062     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7063     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7064     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7065     compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
7066 }
7067 
7068 #define float16_is_zero(ARG) 0
7069 #define float16_is_zero_or_denormal(ARG) 0
7070 
7071 #define IS_DENORMAL(ARG, BITS)                      \
7072     (!float ## BITS ## _is_zero(ARG)                \
7073     && float ## BITS ## _is_zero_or_denormal(ARG))
7074 
7075 #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
7076     do {                                                                    \
7077         float_status *status = &env->active_tc.msa_fp_status;               \
7078         int c;                                                              \
7079                                                                             \
7080         set_float_exception_flags(0, status);                               \
7081         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7082         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7083                                                                             \
7084         if (get_enabled_exceptions(env, c)) {                               \
7085             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7086         }                                                                   \
7087     } while (0)
7088 
helper_msa_fadd_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7089 void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7090         uint32_t ws, uint32_t wt)
7091 {
7092     wr_t wx, *pwx = &wx;
7093     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7094     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7095     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7096     uint32_t i;
7097 
7098     clear_msacsr_cause(env);
7099 
7100     switch (df) {
7101     case DF_WORD:
7102         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7103             MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
7104         }
7105         break;
7106     case DF_DOUBLE:
7107         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7108             MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
7109         }
7110         break;
7111     default:
7112         g_assert_not_reached();
7113     }
7114 
7115     check_msacsr_cause(env, GETPC());
7116     msa_move_v(pwd, pwx);
7117 }
7118 
helper_msa_fsub_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7119 void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7120         uint32_t ws, uint32_t wt)
7121 {
7122     wr_t wx, *pwx = &wx;
7123     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7124     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7125     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7126     uint32_t i;
7127 
7128     clear_msacsr_cause(env);
7129 
7130     switch (df) {
7131     case DF_WORD:
7132         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7133             MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
7134         }
7135         break;
7136     case DF_DOUBLE:
7137         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7138             MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
7139         }
7140         break;
7141     default:
7142         g_assert_not_reached();
7143     }
7144 
7145     check_msacsr_cause(env, GETPC());
7146     msa_move_v(pwd, pwx);
7147 }
7148 
helper_msa_fmul_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7149 void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7150         uint32_t ws, uint32_t wt)
7151 {
7152     wr_t wx, *pwx = &wx;
7153     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7154     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7155     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7156     uint32_t i;
7157 
7158     clear_msacsr_cause(env);
7159 
7160     switch (df) {
7161     case DF_WORD:
7162         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7163             MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
7164         }
7165         break;
7166     case DF_DOUBLE:
7167         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7168             MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
7169         }
7170         break;
7171     default:
7172         g_assert_not_reached();
7173     }
7174 
7175     check_msacsr_cause(env, GETPC());
7176 
7177     msa_move_v(pwd, pwx);
7178 }
7179 
helper_msa_fdiv_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7180 void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7181         uint32_t ws, uint32_t wt)
7182 {
7183     wr_t wx, *pwx = &wx;
7184     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7185     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7186     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7187     uint32_t i;
7188 
7189     clear_msacsr_cause(env);
7190 
7191     switch (df) {
7192     case DF_WORD:
7193         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7194             MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
7195         }
7196         break;
7197     case DF_DOUBLE:
7198         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7199             MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
7200         }
7201         break;
7202     default:
7203         g_assert_not_reached();
7204     }
7205 
7206     check_msacsr_cause(env, GETPC());
7207 
7208     msa_move_v(pwd, pwx);
7209 }
7210 
7211 #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
7212     do {                                                                    \
7213         float_status *status = &env->active_tc.msa_fp_status;               \
7214         int c;                                                              \
7215                                                                             \
7216         set_float_exception_flags(0, status);                               \
7217         DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
7218         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7219                                                                             \
7220         if (get_enabled_exceptions(env, c)) {                               \
7221             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7222         }                                                                   \
7223     } while (0)
7224 
helper_msa_fmadd_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7225 void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7226         uint32_t ws, uint32_t wt)
7227 {
7228     wr_t wx, *pwx = &wx;
7229     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7230     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7231     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7232     uint32_t i;
7233 
7234     clear_msacsr_cause(env);
7235 
7236     switch (df) {
7237     case DF_WORD:
7238         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7239             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7240                            pws->w[i], pwt->w[i], 0, 32);
7241         }
7242         break;
7243     case DF_DOUBLE:
7244         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7245             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7246                            pws->d[i], pwt->d[i], 0, 64);
7247         }
7248         break;
7249     default:
7250         g_assert_not_reached();
7251     }
7252 
7253     check_msacsr_cause(env, GETPC());
7254 
7255     msa_move_v(pwd, pwx);
7256 }
7257 
helper_msa_fmsub_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7258 void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7259         uint32_t ws, uint32_t wt)
7260 {
7261     wr_t wx, *pwx = &wx;
7262     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7263     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7264     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7265     uint32_t i;
7266 
7267     clear_msacsr_cause(env);
7268 
7269     switch (df) {
7270     case DF_WORD:
7271         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7272             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7273                            pws->w[i], pwt->w[i],
7274                            float_muladd_negate_product, 32);
7275       }
7276       break;
7277     case DF_DOUBLE:
7278         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7279             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7280                            pws->d[i], pwt->d[i],
7281                            float_muladd_negate_product, 64);
7282         }
7283         break;
7284     default:
7285         g_assert_not_reached();
7286     }
7287 
7288     check_msacsr_cause(env, GETPC());
7289 
7290     msa_move_v(pwd, pwx);
7291 }
7292 
helper_msa_fexp2_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7293 void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7294         uint32_t ws, uint32_t wt)
7295 {
7296     wr_t wx, *pwx = &wx;
7297     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7298     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7299     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7300     uint32_t i;
7301 
7302     clear_msacsr_cause(env);
7303 
7304     switch (df) {
7305     case DF_WORD:
7306         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7307             MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
7308                             pwt->w[i] >  0x200 ?  0x200 :
7309                             pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
7310                             32);
7311         }
7312         break;
7313     case DF_DOUBLE:
7314         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7315             MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
7316                             pwt->d[i] >  0x1000 ?  0x1000 :
7317                             pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
7318                             64);
7319         }
7320         break;
7321     default:
7322         g_assert_not_reached();
7323     }
7324 
7325     check_msacsr_cause(env, GETPC());
7326 
7327     msa_move_v(pwd, pwx);
7328 }
7329 
7330 #define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
7331     do {                                                                    \
7332         float_status *status = &env->active_tc.msa_fp_status;               \
7333         int c;                                                              \
7334                                                                             \
7335         set_float_exception_flags(0, status);                               \
7336         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7337         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7338                                                                             \
7339         if (get_enabled_exceptions(env, c)) {                               \
7340             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7341         }                                                                   \
7342     } while (0)
7343 
helper_msa_fexdo_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7344 void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7345                          uint32_t ws, uint32_t wt)
7346 {
7347     wr_t wx, *pwx = &wx;
7348     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7349     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7350     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7351     uint32_t i;
7352 
7353     clear_msacsr_cause(env);
7354 
7355     switch (df) {
7356     case DF_WORD:
7357         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7358             /*
7359              * Half precision floats come in two formats: standard
7360              * IEEE and "ARM" format.  The latter gains extra exponent
7361              * range by omitting the NaN/Inf encodings.
7362              */
7363             bool ieee = true;
7364 
7365             MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
7366             MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
7367         }
7368         break;
7369     case DF_DOUBLE:
7370         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7371             MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
7372             MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
7373         }
7374         break;
7375     default:
7376         g_assert_not_reached();
7377     }
7378 
7379     check_msacsr_cause(env, GETPC());
7380     msa_move_v(pwd, pwx);
7381 }
7382 
7383 #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
7384     do {                                                                    \
7385         float_status *status = &env->active_tc.msa_fp_status;               \
7386         int c;                                                              \
7387                                                                             \
7388         set_float_exception_flags(0, status);                               \
7389         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7390         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7391                                                                             \
7392         if (get_enabled_exceptions(env, c)) {                               \
7393             DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
7394         }                                                                   \
7395     } while (0)
7396 
helper_msa_ftq_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7397 void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7398                        uint32_t ws, uint32_t wt)
7399 {
7400     wr_t wx, *pwx = &wx;
7401     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7402     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7403     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7404     uint32_t i;
7405 
7406     clear_msacsr_cause(env);
7407 
7408     switch (df) {
7409     case DF_WORD:
7410         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7411             MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
7412             MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
7413         }
7414         break;
7415     case DF_DOUBLE:
7416         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7417             MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
7418             MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
7419         }
7420         break;
7421     default:
7422         g_assert_not_reached();
7423     }
7424 
7425     check_msacsr_cause(env, GETPC());
7426 
7427     msa_move_v(pwd, pwx);
7428 }
7429 
7430 #define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
7431     !float ## BITS ## _is_any_nan(ARG1)                 \
7432     && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
7433 
7434 #define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
7435     do {                                                                    \
7436         float_status *status_ = &env->active_tc.msa_fp_status;              \
7437         int c;                                                              \
7438                                                                             \
7439         set_float_exception_flags(0, status_);                              \
7440         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status_);               \
7441         c = update_msacsr(env, 0, 0);                                       \
7442                                                                             \
7443         if (get_enabled_exceptions(env, c)) {                               \
7444             DEST = ((FLOAT_SNAN ## BITS(status_) >> 6) << 6) | c;           \
7445         }                                                                   \
7446     } while (0)
7447 
7448 #define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
7449     do {                                                            \
7450         uint## BITS ##_t S = _S, T = _T;                            \
7451         uint## BITS ##_t as, at, xs, xt, xd;                        \
7452         if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
7453             T = S;                                                  \
7454         }                                                           \
7455         else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
7456             S = T;                                                  \
7457         }                                                           \
7458         as = float## BITS ##_abs(S);                                \
7459         at = float## BITS ##_abs(T);                                \
7460         MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
7461         MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
7462         MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
7463         X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
7464     } while (0)
7465 
helper_msa_fmin_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7466 void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7467         uint32_t ws, uint32_t wt)
7468 {
7469     float_status *status = &env->active_tc.msa_fp_status;
7470     wr_t wx, *pwx = &wx;
7471     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7472     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7473     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7474 
7475     clear_msacsr_cause(env);
7476 
7477     if (df == DF_WORD) {
7478 
7479         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7480             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
7481         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7482             MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
7483         } else {
7484             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
7485         }
7486 
7487         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7488             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
7489         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7490             MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
7491         } else {
7492             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
7493         }
7494 
7495         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7496             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
7497         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7498             MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
7499         } else {
7500             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
7501         }
7502 
7503         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7504             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
7505         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7506             MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
7507         } else {
7508             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
7509         }
7510 
7511     } else if (df == DF_DOUBLE) {
7512 
7513         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7514             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
7515         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7516             MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
7517         } else {
7518             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
7519         }
7520 
7521         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7522             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
7523         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7524             MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
7525         } else {
7526             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
7527         }
7528 
7529     } else {
7530 
7531         g_assert_not_reached();
7532 
7533     }
7534 
7535     check_msacsr_cause(env, GETPC());
7536 
7537     msa_move_v(pwd, pwx);
7538 }
7539 
helper_msa_fmin_a_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7540 void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7541         uint32_t ws, uint32_t wt)
7542 {
7543     float_status *status = &env->active_tc.msa_fp_status;
7544     wr_t wx, *pwx = &wx;
7545     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7546     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7547     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7548 
7549     clear_msacsr_cause(env);
7550 
7551     if (df == DF_WORD) {
7552         FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7553         FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7554         FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7555         FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7556     } else if (df == DF_DOUBLE) {
7557         FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7558         FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7559     } else {
7560         g_assert_not_reached();
7561     }
7562 
7563     check_msacsr_cause(env, GETPC());
7564 
7565     msa_move_v(pwd, pwx);
7566 }
7567 
helper_msa_fmax_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7568 void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7569         uint32_t ws, uint32_t wt)
7570 {
7571      float_status *status = &env->active_tc.msa_fp_status;
7572     wr_t wx, *pwx = &wx;
7573     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7574     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7575     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7576 
7577     clear_msacsr_cause(env);
7578 
7579     if (df == DF_WORD) {
7580 
7581         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7582             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
7583         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7584             MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
7585         } else {
7586             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
7587         }
7588 
7589         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7590             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
7591         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7592             MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
7593         } else {
7594             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
7595         }
7596 
7597         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7598             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
7599         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7600             MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
7601         } else {
7602             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
7603         }
7604 
7605         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7606             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
7607         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7608             MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
7609         } else {
7610             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
7611         }
7612 
7613     } else if (df == DF_DOUBLE) {
7614 
7615         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7616             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
7617         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7618             MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
7619         } else {
7620             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
7621         }
7622 
7623         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7624             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
7625         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7626             MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
7627         } else {
7628             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
7629         }
7630 
7631     } else {
7632 
7633         g_assert_not_reached();
7634 
7635     }
7636 
7637     check_msacsr_cause(env, GETPC());
7638 
7639     msa_move_v(pwd, pwx);
7640 }
7641 
helper_msa_fmax_a_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws,uint32_t wt)7642 void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7643         uint32_t ws, uint32_t wt)
7644 {
7645     float_status *status = &env->active_tc.msa_fp_status;
7646     wr_t wx, *pwx = &wx;
7647     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7648     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7649     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7650 
7651     clear_msacsr_cause(env);
7652 
7653     if (df == DF_WORD) {
7654         FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7655         FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7656         FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7657         FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7658     } else if (df == DF_DOUBLE) {
7659         FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7660         FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7661     } else {
7662         g_assert_not_reached();
7663     }
7664 
7665     check_msacsr_cause(env, GETPC());
7666 
7667     msa_move_v(pwd, pwx);
7668 }
7669 
helper_msa_fclass_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7670 void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
7671         uint32_t wd, uint32_t ws)
7672 {
7673     float_status *status = &env->active_tc.msa_fp_status;
7674 
7675     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7676     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7677     if (df == DF_WORD) {
7678         pwd->w[0] = float_class_s(pws->w[0], status);
7679         pwd->w[1] = float_class_s(pws->w[1], status);
7680         pwd->w[2] = float_class_s(pws->w[2], status);
7681         pwd->w[3] = float_class_s(pws->w[3], status);
7682     } else if (df == DF_DOUBLE) {
7683         pwd->d[0] = float_class_d(pws->d[0], status);
7684         pwd->d[1] = float_class_d(pws->d[1], status);
7685     } else {
7686         g_assert_not_reached();
7687     }
7688 }
7689 
7690 #define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
7691     do {                                                                    \
7692         float_status *status = &env->active_tc.msa_fp_status;               \
7693         int c;                                                              \
7694                                                                             \
7695         set_float_exception_flags(0, status);                               \
7696         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7697         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7698                                                                             \
7699         if (get_enabled_exceptions(env, c)) {                               \
7700             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7701         } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
7702             DEST = 0;                                                       \
7703         }                                                                   \
7704     } while (0)
7705 
helper_msa_ftrunc_s_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7706 void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7707                             uint32_t ws)
7708 {
7709     wr_t wx, *pwx = &wx;
7710     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7711     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7712     uint32_t i;
7713 
7714     clear_msacsr_cause(env);
7715 
7716     switch (df) {
7717     case DF_WORD:
7718         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7719             MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
7720         }
7721         break;
7722     case DF_DOUBLE:
7723         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7724             MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
7725         }
7726         break;
7727     default:
7728         g_assert_not_reached();
7729     }
7730 
7731     check_msacsr_cause(env, GETPC());
7732 
7733     msa_move_v(pwd, pwx);
7734 }
7735 
helper_msa_ftrunc_u_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7736 void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7737                             uint32_t ws)
7738 {
7739     wr_t wx, *pwx = &wx;
7740     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7741     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7742     uint32_t i;
7743 
7744     clear_msacsr_cause(env);
7745 
7746     switch (df) {
7747     case DF_WORD:
7748         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7749             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
7750         }
7751         break;
7752     case DF_DOUBLE:
7753         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7754             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
7755         }
7756         break;
7757     default:
7758         g_assert_not_reached();
7759     }
7760 
7761     check_msacsr_cause(env, GETPC());
7762 
7763     msa_move_v(pwd, pwx);
7764 }
7765 
helper_msa_fsqrt_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7766 void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7767                          uint32_t ws)
7768 {
7769     wr_t wx, *pwx = &wx;
7770     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7771     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7772     uint32_t i;
7773 
7774     clear_msacsr_cause(env);
7775 
7776     switch (df) {
7777     case DF_WORD:
7778         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7779             MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
7780         }
7781         break;
7782     case DF_DOUBLE:
7783         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7784             MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
7785         }
7786         break;
7787     default:
7788         g_assert_not_reached();
7789     }
7790 
7791     check_msacsr_cause(env, GETPC());
7792 
7793     msa_move_v(pwd, pwx);
7794 }
7795 
7796 #define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
7797     do {                                                                    \
7798         float_status *status = &env->active_tc.msa_fp_status;               \
7799         int c;                                                              \
7800                                                                             \
7801         set_float_exception_flags(0, status);                               \
7802         DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
7803         c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
7804                           float ## BITS ## _is_quiet_nan(DEST, status) ?    \
7805                           0 : RECIPROCAL_INEXACT,                           \
7806                           IS_DENORMAL(DEST, BITS));                         \
7807                                                                             \
7808         if (get_enabled_exceptions(env, c)) {                               \
7809             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7810         }                                                                   \
7811     } while (0)
7812 
helper_msa_frsqrt_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7813 void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7814                           uint32_t ws)
7815 {
7816     wr_t wx, *pwx = &wx;
7817     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7818     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7819     uint32_t i;
7820 
7821     clear_msacsr_cause(env);
7822 
7823     switch (df) {
7824     case DF_WORD:
7825         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7826             MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
7827                     &env->active_tc.msa_fp_status), 32);
7828         }
7829         break;
7830     case DF_DOUBLE:
7831         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7832             MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
7833                     &env->active_tc.msa_fp_status), 64);
7834         }
7835         break;
7836     default:
7837         g_assert_not_reached();
7838     }
7839 
7840     check_msacsr_cause(env, GETPC());
7841 
7842     msa_move_v(pwd, pwx);
7843 }
7844 
helper_msa_frcp_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7845 void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7846                         uint32_t ws)
7847 {
7848     wr_t wx, *pwx = &wx;
7849     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7850     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7851     uint32_t i;
7852 
7853     clear_msacsr_cause(env);
7854 
7855     switch (df) {
7856     case DF_WORD:
7857         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7858             MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
7859         }
7860         break;
7861     case DF_DOUBLE:
7862         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7863             MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
7864         }
7865         break;
7866     default:
7867         g_assert_not_reached();
7868     }
7869 
7870     check_msacsr_cause(env, GETPC());
7871 
7872     msa_move_v(pwd, pwx);
7873 }
7874 
helper_msa_frint_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7875 void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7876                          uint32_t ws)
7877 {
7878     wr_t wx, *pwx = &wx;
7879     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7880     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7881     uint32_t i;
7882 
7883     clear_msacsr_cause(env);
7884 
7885     switch (df) {
7886     case DF_WORD:
7887         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7888             MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
7889         }
7890         break;
7891     case DF_DOUBLE:
7892         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7893             MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
7894         }
7895         break;
7896     default:
7897         g_assert_not_reached();
7898     }
7899 
7900     check_msacsr_cause(env, GETPC());
7901 
7902     msa_move_v(pwd, pwx);
7903 }
7904 
7905 #define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
7906     do {                                                                    \
7907         float_status *status = &env->active_tc.msa_fp_status;               \
7908         int c;                                                              \
7909                                                                             \
7910         set_float_exception_flags(0, status);                               \
7911         set_float_rounding_mode(float_round_down, status);                  \
7912         DEST = float ## BITS ## _ ## log2(ARG, status);                     \
7913         DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
7914         set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
7915                                          MSACSR_RM_MASK) >> MSACSR_RM],     \
7916                                 status);                                    \
7917                                                                             \
7918         set_float_exception_flags(get_float_exception_flags(status) &       \
7919                                   (~float_flag_inexact),                    \
7920                                   status);                                  \
7921                                                                             \
7922         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7923                                                                             \
7924         if (get_enabled_exceptions(env, c)) {                               \
7925             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7926         }                                                                   \
7927     } while (0)
7928 
helper_msa_flog2_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7929 void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7930                          uint32_t ws)
7931 {
7932     wr_t wx, *pwx = &wx;
7933     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7934     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7935     uint32_t i;
7936 
7937     clear_msacsr_cause(env);
7938 
7939     switch (df) {
7940     case DF_WORD:
7941         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7942             MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
7943         }
7944         break;
7945     case DF_DOUBLE:
7946         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7947             MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
7948         }
7949         break;
7950     default:
7951         g_assert_not_reached();
7952     }
7953 
7954     check_msacsr_cause(env, GETPC());
7955 
7956     msa_move_v(pwd, pwx);
7957 }
7958 
helper_msa_fexupl_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7959 void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7960                           uint32_t ws)
7961 {
7962     wr_t wx, *pwx = &wx;
7963     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7964     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7965     uint32_t i;
7966 
7967     clear_msacsr_cause(env);
7968 
7969     switch (df) {
7970     case DF_WORD:
7971         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7972             /*
7973              * Half precision floats come in two formats: standard
7974              * IEEE and "ARM" format.  The latter gains extra exponent
7975              * range by omitting the NaN/Inf encodings.
7976              */
7977             bool ieee = true;
7978 
7979             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
7980         }
7981         break;
7982     case DF_DOUBLE:
7983         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7984             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
7985         }
7986         break;
7987     default:
7988         g_assert_not_reached();
7989     }
7990 
7991     check_msacsr_cause(env, GETPC());
7992     msa_move_v(pwd, pwx);
7993 }
7994 
helper_msa_fexupr_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)7995 void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7996                           uint32_t ws)
7997 {
7998     wr_t wx, *pwx = &wx;
7999     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8000     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8001     uint32_t i;
8002 
8003     clear_msacsr_cause(env);
8004 
8005     switch (df) {
8006     case DF_WORD:
8007         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8008             /*
8009              * Half precision floats come in two formats: standard
8010              * IEEE and "ARM" format.  The latter gains extra exponent
8011              * range by omitting the NaN/Inf encodings.
8012              */
8013             bool ieee = true;
8014 
8015             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
8016         }
8017         break;
8018     case DF_DOUBLE:
8019         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8020             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
8021         }
8022         break;
8023     default:
8024         g_assert_not_reached();
8025     }
8026 
8027     check_msacsr_cause(env, GETPC());
8028     msa_move_v(pwd, pwx);
8029 }
8030 
helper_msa_ffql_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)8031 void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8032                         uint32_t ws)
8033 {
8034     wr_t wx, *pwx = &wx;
8035     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8036     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8037     uint32_t i;
8038 
8039     switch (df) {
8040     case DF_WORD:
8041         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8042             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
8043         }
8044         break;
8045     case DF_DOUBLE:
8046         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8047             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
8048         }
8049         break;
8050     default:
8051         g_assert_not_reached();
8052     }
8053 
8054     msa_move_v(pwd, pwx);
8055 }
8056 
helper_msa_ffqr_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)8057 void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8058                         uint32_t ws)
8059 {
8060     wr_t wx, *pwx = &wx;
8061     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8062     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8063     uint32_t i;
8064 
8065     switch (df) {
8066     case DF_WORD:
8067         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8068             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
8069         }
8070         break;
8071     case DF_DOUBLE:
8072         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8073             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
8074         }
8075         break;
8076     default:
8077         g_assert_not_reached();
8078     }
8079 
8080     msa_move_v(pwd, pwx);
8081 }
8082 
helper_msa_ftint_s_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)8083 void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8084                            uint32_t ws)
8085 {
8086     wr_t wx, *pwx = &wx;
8087     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8088     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8089     uint32_t i;
8090 
8091     clear_msacsr_cause(env);
8092 
8093     switch (df) {
8094     case DF_WORD:
8095         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8096             MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
8097         }
8098         break;
8099     case DF_DOUBLE:
8100         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8101             MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
8102         }
8103         break;
8104     default:
8105         g_assert_not_reached();
8106     }
8107 
8108     check_msacsr_cause(env, GETPC());
8109 
8110     msa_move_v(pwd, pwx);
8111 }
8112 
helper_msa_ftint_u_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)8113 void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8114                            uint32_t ws)
8115 {
8116     wr_t wx, *pwx = &wx;
8117     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8118     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8119     uint32_t i;
8120 
8121     clear_msacsr_cause(env);
8122 
8123     switch (df) {
8124     case DF_WORD:
8125         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8126             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
8127         }
8128         break;
8129     case DF_DOUBLE:
8130         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8131             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
8132         }
8133         break;
8134     default:
8135         g_assert_not_reached();
8136     }
8137 
8138     check_msacsr_cause(env, GETPC());
8139 
8140     msa_move_v(pwd, pwx);
8141 }
8142 
8143 #define float32_from_int32 int32_to_float32
8144 #define float32_from_uint32 uint32_to_float32
8145 
8146 #define float64_from_int64 int64_to_float64
8147 #define float64_from_uint64 uint64_to_float64
8148 
helper_msa_ffint_s_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)8149 void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8150                            uint32_t ws)
8151 {
8152     wr_t wx, *pwx = &wx;
8153     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8154     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8155     uint32_t i;
8156 
8157     clear_msacsr_cause(env);
8158 
8159     switch (df) {
8160     case DF_WORD:
8161         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8162             MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
8163         }
8164         break;
8165     case DF_DOUBLE:
8166         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8167             MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
8168         }
8169         break;
8170     default:
8171         g_assert_not_reached();
8172     }
8173 
8174     check_msacsr_cause(env, GETPC());
8175 
8176     msa_move_v(pwd, pwx);
8177 }
8178 
helper_msa_ffint_u_df(CPUMIPSState * env,uint32_t df,uint32_t wd,uint32_t ws)8179 void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8180                            uint32_t ws)
8181 {
8182     wr_t wx, *pwx = &wx;
8183     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8184     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8185     uint32_t i;
8186 
8187     clear_msacsr_cause(env);
8188 
8189     switch (df) {
8190     case DF_WORD:
8191         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8192             MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
8193         }
8194         break;
8195     case DF_DOUBLE:
8196         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8197             MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
8198         }
8199         break;
8200     default:
8201         g_assert_not_reached();
8202     }
8203 
8204     check_msacsr_cause(env, GETPC());
8205 
8206     msa_move_v(pwd, pwx);
8207 }
8208 
8209 /* Data format min and max values */
8210 #define DF_BITS(df) (1 << ((df) + 3))
8211 
8212 /* Element-by-element access macros */
8213 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
8214 
bswap16x4(uint64_t x)8215 static inline uint64_t bswap16x4(uint64_t x)
8216 {
8217     uint64_t m = 0x00ff00ff00ff00ffull;
8218     return ((x & m) << 8) | ((x >> 8) & m);
8219 }
8220 
bswap32x2(uint64_t x)8221 static inline uint64_t bswap32x2(uint64_t x)
8222 {
8223     return ror64(bswap64(x), 32);
8224 }
8225 
helper_msa_ld_b(CPUMIPSState * env,uint32_t wd,target_ulong addr)8226 void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
8227                      target_ulong addr)
8228 {
8229     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8230     uintptr_t ra = GETPC();
8231     uint64_t d0, d1;
8232 
8233     /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
8234     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8235     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8236     pwd->d[0] = d0;
8237     pwd->d[1] = d1;
8238 }
8239 
helper_msa_ld_h(CPUMIPSState * env,uint32_t wd,target_ulong addr)8240 void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
8241                      target_ulong addr)
8242 {
8243     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8244     uintptr_t ra = GETPC();
8245     uint64_t d0, d1;
8246 
8247     /*
8248      * Load 8 bytes at a time.  Use little-endian load, then for
8249      * big-endian target, we must then swap the four halfwords.
8250      */
8251     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8252     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8253     if (mips_env_is_bigendian(env)) {
8254         d0 = bswap16x4(d0);
8255         d1 = bswap16x4(d1);
8256     }
8257     pwd->d[0] = d0;
8258     pwd->d[1] = d1;
8259 }
8260 
helper_msa_ld_w(CPUMIPSState * env,uint32_t wd,target_ulong addr)8261 void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
8262                      target_ulong addr)
8263 {
8264     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8265     uintptr_t ra = GETPC();
8266     uint64_t d0, d1;
8267 
8268     /*
8269      * Load 8 bytes at a time.  Use little-endian load, then for
8270      * big-endian target, we must then bswap the two words.
8271      */
8272     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8273     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8274     if (mips_env_is_bigendian(env)) {
8275         d0 = bswap32x2(d0);
8276         d1 = bswap32x2(d1);
8277     }
8278     pwd->d[0] = d0;
8279     pwd->d[1] = d1;
8280 }
8281 
helper_msa_ld_d(CPUMIPSState * env,uint32_t wd,target_ulong addr)8282 void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
8283                      target_ulong addr)
8284 {
8285     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8286     uintptr_t ra = GETPC();
8287     uint64_t d0, d1;
8288 
8289     d0 = cpu_ldq_data_ra(env, addr + 0, ra);
8290     d1 = cpu_ldq_data_ra(env, addr + 8, ra);
8291     pwd->d[0] = d0;
8292     pwd->d[1] = d1;
8293 }
8294 
8295 #define MSA_PAGESPAN(x) \
8296         ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
8297 
ensure_writable_pages(CPUMIPSState * env,target_ulong addr,int mmu_idx,uintptr_t retaddr)8298 static inline void ensure_writable_pages(CPUMIPSState *env,
8299                                          target_ulong addr,
8300                                          int mmu_idx,
8301                                          uintptr_t retaddr)
8302 {
8303     /* FIXME: Probe the actual accesses (pass and use a size) */
8304     if (unlikely(MSA_PAGESPAN(addr))) {
8305         /* first page */
8306         probe_write(env, addr, 0, mmu_idx, retaddr);
8307         /* second page */
8308         addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8309         probe_write(env, addr, 0, mmu_idx, retaddr);
8310     }
8311 }
8312 
helper_msa_st_b(CPUMIPSState * env,uint32_t wd,target_ulong addr)8313 void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
8314                      target_ulong addr)
8315 {
8316     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8317     int mmu_idx = mips_env_mmu_index(env);
8318     uintptr_t ra = GETPC();
8319 
8320     ensure_writable_pages(env, addr, mmu_idx, ra);
8321 
8322     /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
8323     cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
8324     cpu_stq_le_data_ra(env, addr + 8, pwd->d[1], ra);
8325 }
8326 
helper_msa_st_h(CPUMIPSState * env,uint32_t wd,target_ulong addr)8327 void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
8328                      target_ulong addr)
8329 {
8330     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8331     int mmu_idx = mips_env_mmu_index(env);
8332     uintptr_t ra = GETPC();
8333     uint64_t d0, d1;
8334 
8335     ensure_writable_pages(env, addr, mmu_idx, ra);
8336 
8337     /* Store 8 bytes at a time.  See helper_msa_ld_h. */
8338     d0 = pwd->d[0];
8339     d1 = pwd->d[1];
8340     if (mips_env_is_bigendian(env)) {
8341         d0 = bswap16x4(d0);
8342         d1 = bswap16x4(d1);
8343     }
8344     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8345     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8346 }
8347 
helper_msa_st_w(CPUMIPSState * env,uint32_t wd,target_ulong addr)8348 void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
8349                      target_ulong addr)
8350 {
8351     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8352     int mmu_idx = mips_env_mmu_index(env);
8353     uintptr_t ra = GETPC();
8354     uint64_t d0, d1;
8355 
8356     ensure_writable_pages(env, addr, mmu_idx, ra);
8357 
8358     /* Store 8 bytes at a time.  See helper_msa_ld_w. */
8359     d0 = pwd->d[0];
8360     d1 = pwd->d[1];
8361     if (mips_env_is_bigendian(env)) {
8362         d0 = bswap32x2(d0);
8363         d1 = bswap32x2(d1);
8364     }
8365     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8366     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8367 }
8368 
helper_msa_st_d(CPUMIPSState * env,uint32_t wd,target_ulong addr)8369 void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
8370                      target_ulong addr)
8371 {
8372     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8373     int mmu_idx = mips_env_mmu_index(env);
8374     uintptr_t ra = GETPC();
8375 
8376     ensure_writable_pages(env, addr, mmu_idx, GETPC());
8377 
8378     cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
8379     cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
8380 }
8381