xref: /qemu/target/mips/tcg/msa_helper.c (revision d64db833d6e3cbe9ea5f36342480f920f3675cea)
1  /*
2   * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
3   *
4   * Copyright (c) 2014 Imagination Technologies
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "cpu.h"
22  #include "internal.h"
23  #include "tcg/tcg.h"
24  #include "exec/exec-all.h"
25  #include "accel/tcg/cpu-ldst.h"
26  #include "exec/helper-proto.h"
27  #include "exec/memop.h"
28  #include "exec/target_page.h"
29  #include "fpu/softfloat.h"
30  #include "fpu_helper.h"
31  
32  /* Data format min and max values */
33  #define DF_BITS(df) (1 << ((df) + 3))
34  
35  #define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
36  #define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
37  
38  #define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
39  #define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
40  
41  #define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
42  #define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
43  
44  #define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
45  #define SIGNED(x, df)                                                   \
46      ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
47  
48  /* Element-by-element access macros */
49  #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
50  
51  
52  
53  /*
54   * Bit Count
55   * ---------
56   *
57   * +---------------+----------------------------------------------------------+
58   * | NLOC.B        | Vector Leading Ones Count (byte)                         |
59   * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
60   * | NLOC.W        | Vector Leading Ones Count (word)                         |
61   * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
62   * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
63   * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
64   * | NLZC.W        | Vector Leading Zeros Count (word)                        |
65   * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
66   * | PCNT.B        | Vector Population Count (byte)                           |
67   * | PCNT.H        | Vector Population Count (halfword)                       |
68   * | PCNT.W        | Vector Population Count (word)                           |
69   * | PCNT.D        | Vector Population Count (doubleword)                     |
70   * +---------------+----------------------------------------------------------+
71   */
72  
73  static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
74  {
75      uint64_t x, y;
76      int n, c;
77  
78      x = UNSIGNED(arg, df);
79      n = DF_BITS(df);
80      c = DF_BITS(df) / 2;
81  
82      do {
83          y = x >> c;
84          if (y != 0) {
85              n = n - c;
86              x = y;
87          }
88          c = c >> 1;
89      } while (c != 0);
90  
91      return n - x;
92  }
93  
94  static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
95  {
96      return msa_nlzc_df(df, UNSIGNED((~arg), df));
97  }
98  
99  void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
100  {
101      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
102      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
103  
104      pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
105      pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
106      pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
107      pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
108      pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
109      pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
110      pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
111      pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
112      pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
113      pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
114      pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
115      pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
116      pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
117      pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
118      pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
119      pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
120  }
121  
122  void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
123  {
124      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
125      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
126  
127      pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
128      pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
129      pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
130      pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
131      pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
132      pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
133      pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
134      pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
135  }
136  
137  void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
138  {
139      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
140      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
141  
142      pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
143      pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
144      pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
145      pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
146  }
147  
148  void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
149  {
150      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
151      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
152  
153      pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
154      pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
155  }
156  
157  void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
158  {
159      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
160      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
161  
162      pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
163      pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
164      pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
165      pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
166      pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
167      pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
168      pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
169      pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
170      pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
171      pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
172      pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
173      pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
174      pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
175      pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
176      pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
177      pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
178  }
179  
180  void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
181  {
182      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
183      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
184  
185      pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
186      pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
187      pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
188      pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
189      pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
190      pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
191      pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
192      pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
193  }
194  
195  void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
196  {
197      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
198      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
199  
200      pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
201      pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
202      pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
203      pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
204  }
205  
206  void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
207  {
208      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
209      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
210  
211      pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
212      pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
213  }
214  
215  static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
216  {
217      uint64_t x;
218  
219      x = UNSIGNED(arg, df);
220  
221      x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
222      x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
223      x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
224      x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
225      x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
226      x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
227  
228      return x;
229  }
230  
231  void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
232  {
233      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
234      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
235  
236      pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
237      pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
238      pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
239      pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
240      pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
241      pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
242      pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
243      pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
244      pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
245      pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
246      pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
247      pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
248      pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
249      pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
250      pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
251      pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
252  }
253  
254  void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
255  {
256      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
257      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
258  
259      pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
260      pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
261      pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
262      pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
263      pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
264      pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
265      pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
266      pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
267  }
268  
269  void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
270  {
271      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
272      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
273  
274      pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
275      pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
276      pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
277      pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
278  }
279  
280  void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
281  {
282      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
283      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
284  
285      pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
286      pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
287  }
288  
289  
290  /*
291   * Bit Move
292   * --------
293   *
294   * +---------------+----------------------------------------------------------+
295   * | BINSL.B       | Vector Bit Insert Left (byte)                            |
296   * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
297   * | BINSL.W       | Vector Bit Insert Left (word)                            |
298   * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
299   * | BINSR.B       | Vector Bit Insert Right (byte)                           |
300   * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
301   * | BINSR.W       | Vector Bit Insert Right (word)                           |
302   * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
303   * | BMNZ.V        | Vector Bit Move If Not Zero                              |
304   * | BMZ.V         | Vector Bit Move If Zero                                  |
305   * | BSEL.V        | Vector Bit Select                                        |
306   * +---------------+----------------------------------------------------------+
307   */
308  
309  /* Data format bit position and unsigned values */
310  #define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
311  
312  static inline int64_t msa_binsl_df(uint32_t df,
313                                     int64_t dest, int64_t arg1, int64_t arg2)
314  {
315      uint64_t u_arg1 = UNSIGNED(arg1, df);
316      uint64_t u_dest = UNSIGNED(dest, df);
317      int32_t sh_d = BIT_POSITION(arg2, df) + 1;
318      int32_t sh_a = DF_BITS(df) - sh_d;
319      if (sh_d == DF_BITS(df)) {
320          return u_arg1;
321      } else {
322          return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
323                 UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
324      }
325  }
326  
327  void helper_msa_binsl_b(CPUMIPSState *env,
328                          uint32_t wd, uint32_t ws, uint32_t wt)
329  {
330      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
331      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
332      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
333  
334      pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
335      pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
336      pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
337      pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
338      pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
339      pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
340      pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
341      pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
342      pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
343      pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
344      pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
345      pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
346      pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
347      pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
348      pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
349      pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
350  }
351  
352  void helper_msa_binsl_h(CPUMIPSState *env,
353                          uint32_t wd, uint32_t ws, uint32_t wt)
354  {
355      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
356      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
357      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
358  
359      pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
360      pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
361      pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
362      pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
363      pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
364      pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
365      pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
366      pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
367  }
368  
369  void helper_msa_binsl_w(CPUMIPSState *env,
370                          uint32_t wd, uint32_t ws, uint32_t wt)
371  {
372      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
373      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
374      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
375  
376      pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
377      pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
378      pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
379      pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
380  }
381  
382  void helper_msa_binsl_d(CPUMIPSState *env,
383                          uint32_t wd, uint32_t ws, uint32_t wt)
384  {
385      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
386      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
387      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
388  
389      pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
390      pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
391  }
392  
393  static inline int64_t msa_binsr_df(uint32_t df,
394                                     int64_t dest, int64_t arg1, int64_t arg2)
395  {
396      uint64_t u_arg1 = UNSIGNED(arg1, df);
397      uint64_t u_dest = UNSIGNED(dest, df);
398      int32_t sh_d = BIT_POSITION(arg2, df) + 1;
399      int32_t sh_a = DF_BITS(df) - sh_d;
400      if (sh_d == DF_BITS(df)) {
401          return u_arg1;
402      } else {
403          return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
404                 UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
405      }
406  }
407  
408  void helper_msa_binsr_b(CPUMIPSState *env,
409                          uint32_t wd, uint32_t ws, uint32_t wt)
410  {
411      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
412      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
413      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
414  
415      pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
416      pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
417      pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
418      pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
419      pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
420      pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
421      pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
422      pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
423      pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
424      pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
425      pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
426      pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
427      pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
428      pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
429      pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
430      pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
431  }
432  
433  void helper_msa_binsr_h(CPUMIPSState *env,
434                          uint32_t wd, uint32_t ws, uint32_t wt)
435  {
436      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
437      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
438      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
439  
440      pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
441      pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
442      pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
443      pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
444      pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
445      pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
446      pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
447      pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
448  }
449  
450  void helper_msa_binsr_w(CPUMIPSState *env,
451                          uint32_t wd, uint32_t ws, uint32_t wt)
452  {
453      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
454      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
455      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
456  
457      pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
458      pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
459      pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
460      pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
461  }
462  
463  void helper_msa_binsr_d(CPUMIPSState *env,
464                          uint32_t wd, uint32_t ws, uint32_t wt)
465  {
466      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
467      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
468      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
469  
470      pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
471      pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
472  }
473  
474  void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
475  {
476      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
477      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
478      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
479  
480      pwd->d[0] = UNSIGNED(                                                     \
481          ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
482      pwd->d[1] = UNSIGNED(                                                     \
483          ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
484  }
485  
486  void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
487  {
488      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
489      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
490      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
491  
492      pwd->d[0] = UNSIGNED(                                                     \
493          ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
494      pwd->d[1] = UNSIGNED(                                                     \
495          ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
496  }
497  
498  void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
499  {
500      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
501      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
502      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
503  
504      pwd->d[0] = UNSIGNED(                                                     \
505          (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
506      pwd->d[1] = UNSIGNED(                                                     \
507          (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
508  }
509  
510  
511  /*
512   * Bit Set
513   * -------
514   *
515   * +---------------+----------------------------------------------------------+
516   * | BCLR.B        | Vector Bit Clear (byte)                                  |
517   * | BCLR.H        | Vector Bit Clear (halfword)                              |
518   * | BCLR.W        | Vector Bit Clear (word)                                  |
519   * | BCLR.D        | Vector Bit Clear (doubleword)                            |
520   * | BNEG.B        | Vector Bit Negate (byte)                                 |
521   * | BNEG.H        | Vector Bit Negate (halfword)                             |
522   * | BNEG.W        | Vector Bit Negate (word)                                 |
523   * | BNEG.D        | Vector Bit Negate (doubleword)                           |
524   * | BSET.B        | Vector Bit Set (byte)                                    |
525   * | BSET.H        | Vector Bit Set (halfword)                                |
526   * | BSET.W        | Vector Bit Set (word)                                    |
527   * | BSET.D        | Vector Bit Set (doubleword)                              |
528   * +---------------+----------------------------------------------------------+
529   */
530  
531  static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
532  {
533      int32_t b_arg2 = BIT_POSITION(arg2, df);
534      return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
535  }
536  
537  void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
538  {
539      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
540      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
541      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
542  
543      pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
544      pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
545      pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
546      pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
547      pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
548      pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
549      pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
550      pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
551      pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
552      pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
553      pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
554      pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
555      pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
556      pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
557      pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
558      pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
559  }
560  
561  void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
562  {
563      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
564      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
565      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
566  
567      pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
568      pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
569      pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
570      pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
571      pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
572      pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
573      pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
574      pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
575  }
576  
577  void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
578  {
579      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
580      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
581      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
582  
583      pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
584      pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
585      pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
586      pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
587  }
588  
589  void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
590  {
591      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
592      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
593      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
594  
595      pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
596      pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
597  }
598  
599  static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
600  {
601      int32_t b_arg2 = BIT_POSITION(arg2, df);
602      return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
603  }
604  
605  void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
606  {
607      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
608      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
609      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
610  
611      pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
612      pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
613      pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
614      pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
615      pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
616      pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
617      pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
618      pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
619      pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
620      pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
621      pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
622      pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
623      pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
624      pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
625      pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
626      pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
627  }
628  
629  void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
630  {
631      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
632      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
633      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
634  
635      pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
636      pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
637      pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
638      pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
639      pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
640      pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
641      pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
642      pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
643  }
644  
645  void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
646  {
647      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
648      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
649      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
650  
651      pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
652      pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
653      pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
654      pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
655  }
656  
657  void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
658  {
659      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
660      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
661      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
662  
663      pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
664      pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
665  }
666  
667  static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
668          int64_t arg2)
669  {
670      int32_t b_arg2 = BIT_POSITION(arg2, df);
671      return UNSIGNED(arg1 | (1LL << b_arg2), df);
672  }
673  
674  void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
675  {
676      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
677      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
678      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
679  
680      pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
681      pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
682      pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
683      pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
684      pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
685      pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
686      pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
687      pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
688      pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
689      pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
690      pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
691      pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
692      pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
693      pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
694      pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
695      pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
696  }
697  
698  void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
699  {
700      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
701      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
702      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
703  
704      pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
705      pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
706      pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
707      pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
708      pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
709      pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
710      pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
711      pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
712  }
713  
714  void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
715  {
716      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
717      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
718      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
719  
720      pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
721      pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
722      pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
723      pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
724  }
725  
726  void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
727  {
728      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
729      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
730      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
731  
732      pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
733      pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
734  }
735  
736  
737  /*
738   * Fixed Multiply
739   * --------------
740   *
741   * +---------------+----------------------------------------------------------+
742   * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
743   * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
744   * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
745   * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
746   * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
747   * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
748   * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
749   * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
750   * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
751   * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
752   * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
753   * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
754   * +---------------+----------------------------------------------------------+
755   */
756  
757  /* TODO: insert Fixed Multiply group helpers here */
758  
759  
760  /*
761   * Float Max Min
762   * -------------
763   *
764   * +---------------+----------------------------------------------------------+
765   * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
766   * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
767   * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
768   * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
769   * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
770   * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
771   * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
772   * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
773   * +---------------+----------------------------------------------------------+
774   */
775  
776  /* TODO: insert Float Max Min group helpers here */
777  
778  
779  /*
780   * Int Add
781   * -------
782   *
783   * +---------------+----------------------------------------------------------+
784   * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
785   * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
786   * | ADD_A.W       | Vector Add Absolute Values (word)                        |
787   * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
788   * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
789   * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
790   * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
791   * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
792   * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
793   * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
794   * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
795   * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
796   * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
797   * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
798   * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
799   * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
800   * | ADDV.B        | Vector Add (byte)                                        |
801   * | ADDV.H        | Vector Add (halfword)                                    |
802   * | ADDV.W        | Vector Add (word)                                        |
803   * | ADDV.D        | Vector Add (doubleword)                                  |
804   * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
805   * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
806   * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
807   * | HADD_U.H      | Vector Unsigned Horizontal Add (halfword)                |
808   * | HADD_U.W      | Vector Unsigned Horizontal Add (word)                    |
809   * | HADD_U.D      | Vector Unsigned Horizontal Add (doubleword)              |
810   * +---------------+----------------------------------------------------------+
811   */
812  
813  
814  static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
815  {
816      uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
817      uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
818      return abs_arg1 + abs_arg2;
819  }
820  
821  void helper_msa_add_a_b(CPUMIPSState *env,
822                          uint32_t wd, uint32_t ws, uint32_t wt)
823  {
824      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
825      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
826      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
827  
828      pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
829      pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
830      pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
831      pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
832      pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
833      pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
834      pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
835      pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
836      pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
837      pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
838      pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
839      pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
840      pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
841      pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
842      pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
843      pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
844  }
845  
846  void helper_msa_add_a_h(CPUMIPSState *env,
847                          uint32_t wd, uint32_t ws, uint32_t wt)
848  {
849      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
850      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
851      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
852  
853      pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
854      pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
855      pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
856      pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
857      pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
858      pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
859      pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
860      pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
861  }
862  
863  void helper_msa_add_a_w(CPUMIPSState *env,
864                          uint32_t wd, uint32_t ws, uint32_t wt)
865  {
866      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
867      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
868      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
869  
870      pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
871      pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
872      pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
873      pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
874  }
875  
876  void helper_msa_add_a_d(CPUMIPSState *env,
877                          uint32_t wd, uint32_t ws, uint32_t wt)
878  {
879      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
880      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
881      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
882  
883      pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
884      pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
885  }
886  
887  
888  static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
889  {
890      uint64_t max_int = (uint64_t)DF_MAX_INT(df);
891      uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
892      uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
893      if (abs_arg1 > max_int || abs_arg2 > max_int) {
894          return (int64_t)max_int;
895      } else {
896          return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
897      }
898  }
899  
900  void helper_msa_adds_a_b(CPUMIPSState *env,
901                           uint32_t wd, uint32_t ws, uint32_t wt)
902  {
903      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
904      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
905      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
906  
907      pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
908      pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
909      pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
910      pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
911      pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
912      pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
913      pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
914      pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
915      pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
916      pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
917      pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
918      pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
919      pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
920      pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
921      pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
922      pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
923  }
924  
925  void helper_msa_adds_a_h(CPUMIPSState *env,
926                           uint32_t wd, uint32_t ws, uint32_t wt)
927  {
928      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
929      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
930      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
931  
932      pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
933      pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
934      pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
935      pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
936      pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
937      pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
938      pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
939      pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
940  }
941  
942  void helper_msa_adds_a_w(CPUMIPSState *env,
943                           uint32_t wd, uint32_t ws, uint32_t wt)
944  {
945      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
946      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
947      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
948  
949      pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
950      pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
951      pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
952      pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
953  }
954  
955  void helper_msa_adds_a_d(CPUMIPSState *env,
956                           uint32_t wd, uint32_t ws, uint32_t wt)
957  {
958      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
959      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
960      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
961  
962      pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
963      pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
964  }
965  
966  
967  static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
968  {
969      int64_t max_int = DF_MAX_INT(df);
970      int64_t min_int = DF_MIN_INT(df);
971      if (arg1 < 0) {
972          return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
973      } else {
974          return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
975      }
976  }
977  
978  void helper_msa_adds_s_b(CPUMIPSState *env,
979                           uint32_t wd, uint32_t ws, uint32_t wt)
980  {
981      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
982      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
983      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
984  
985      pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
986      pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
987      pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
988      pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
989      pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
990      pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
991      pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
992      pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
993      pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
994      pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
995      pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
996      pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
997      pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
998      pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
999      pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1000      pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1001  }
1002  
1003  void helper_msa_adds_s_h(CPUMIPSState *env,
1004                           uint32_t wd, uint32_t ws, uint32_t wt)
1005  {
1006      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1007      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1008      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1009  
1010      pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1011      pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1012      pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1013      pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1014      pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1015      pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1016      pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1017      pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1018  }
1019  
1020  void helper_msa_adds_s_w(CPUMIPSState *env,
1021                           uint32_t wd, uint32_t ws, uint32_t wt)
1022  {
1023      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1024      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1025      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1026  
1027      pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1028      pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1029      pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1030      pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1031  }
1032  
1033  void helper_msa_adds_s_d(CPUMIPSState *env,
1034                           uint32_t wd, uint32_t ws, uint32_t wt)
1035  {
1036      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1037      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1038      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1039  
1040      pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1041      pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1042  }
1043  
1044  
1045  static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1046  {
1047      uint64_t max_uint = DF_MAX_UINT(df);
1048      uint64_t u_arg1 = UNSIGNED(arg1, df);
1049      uint64_t u_arg2 = UNSIGNED(arg2, df);
1050      return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
1051  }
1052  
1053  void helper_msa_adds_u_b(CPUMIPSState *env,
1054                           uint32_t wd, uint32_t ws, uint32_t wt)
1055  {
1056      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1057      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1058      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1059  
1060      pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1061      pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1062      pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1063      pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1064      pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1065      pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1066      pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1067      pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1068      pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1069      pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1070      pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1071      pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1072      pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1073      pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1074      pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1075      pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1076  }
1077  
1078  void helper_msa_adds_u_h(CPUMIPSState *env,
1079                           uint32_t wd, uint32_t ws, uint32_t wt)
1080  {
1081      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1082      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1083      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1084  
1085      pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1086      pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1087      pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1088      pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1089      pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1090      pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1091      pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1092      pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1093  }
1094  
1095  void helper_msa_adds_u_w(CPUMIPSState *env,
1096                           uint32_t wd, uint32_t ws, uint32_t wt)
1097  {
1098      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1099      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1100      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1101  
1102      pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1103      pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1104      pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1105      pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1106  }
1107  
1108  void helper_msa_adds_u_d(CPUMIPSState *env,
1109                           uint32_t wd, uint32_t ws, uint32_t wt)
1110  {
1111      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1112      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1113      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1114  
1115      pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1116      pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1117  }
1118  
1119  
1120  static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
1121  {
1122      return arg1 + arg2;
1123  }
1124  
1125  void helper_msa_addv_b(CPUMIPSState *env,
1126                         uint32_t wd, uint32_t ws, uint32_t wt)
1127  {
1128      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1129      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1130      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1131  
1132      pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1133      pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1134      pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1135      pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1136      pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1137      pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1138      pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1139      pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1140      pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1141      pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1142      pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
1143      pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
1144      pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
1145      pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
1146      pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
1147      pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
1148  }
1149  
1150  void helper_msa_addv_h(CPUMIPSState *env,
1151                         uint32_t wd, uint32_t ws, uint32_t wt)
1152  {
1153      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1154      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1155      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1156  
1157      pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
1158      pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
1159      pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
1160      pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
1161      pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
1162      pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
1163      pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
1164      pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
1165  }
1166  
1167  void helper_msa_addv_w(CPUMIPSState *env,
1168                         uint32_t wd, uint32_t ws, uint32_t wt)
1169  {
1170      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1171      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1172      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1173  
1174      pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
1175      pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
1176      pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
1177      pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
1178  }
1179  
1180  void helper_msa_addv_d(CPUMIPSState *env,
1181                         uint32_t wd, uint32_t ws, uint32_t wt)
1182  {
1183      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1184      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1185      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1186  
1187      pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1188      pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1189  }
1190  
1191  
1192  #define SIGNED_EVEN(a, df) \
1193          ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1194  
1195  #define UNSIGNED_EVEN(a, df) \
1196          ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1197  
1198  #define SIGNED_ODD(a, df) \
1199          ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1200  
1201  #define UNSIGNED_ODD(a, df) \
1202          ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1203  
1204  
1205  static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1206  {
1207      return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
1208  }
1209  
1210  void helper_msa_hadd_s_h(CPUMIPSState *env,
1211                           uint32_t wd, uint32_t ws, uint32_t wt)
1212  {
1213      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1214      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1215      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1216  
1217      pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1218      pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1219      pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1220      pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1221      pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1222      pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1223      pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1224      pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1225  }
1226  
1227  void helper_msa_hadd_s_w(CPUMIPSState *env,
1228                           uint32_t wd, uint32_t ws, uint32_t wt)
1229  {
1230      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1231      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1232      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1233  
1234      pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1235      pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1236      pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1237      pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1238  }
1239  
1240  void helper_msa_hadd_s_d(CPUMIPSState *env,
1241                           uint32_t wd, uint32_t ws, uint32_t wt)
1242  {
1243      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1244      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1245      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1246  
1247      pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1248      pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1249  }
1250  
1251  
1252  static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1253  {
1254      return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
1255  }
1256  
1257  void helper_msa_hadd_u_h(CPUMIPSState *env,
1258                           uint32_t wd, uint32_t ws, uint32_t wt)
1259  {
1260      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1261      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1262      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1263  
1264      pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1265      pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1266      pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1267      pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1268      pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1269      pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1270      pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1271      pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1272  }
1273  
1274  void helper_msa_hadd_u_w(CPUMIPSState *env,
1275                           uint32_t wd, uint32_t ws, uint32_t wt)
1276  {
1277      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1278      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1279      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1280  
1281      pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1282      pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1283      pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1284      pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1285  }
1286  
1287  void helper_msa_hadd_u_d(CPUMIPSState *env,
1288                           uint32_t wd, uint32_t ws, uint32_t wt)
1289  {
1290      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1291      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1292      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1293  
1294      pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1295      pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1296  }
1297  
1298  
1299  /*
1300   * Int Average
1301   * -----------
1302   *
1303   * +---------------+----------------------------------------------------------+
1304   * | AVE_S.B       | Vector Signed Average (byte)                             |
1305   * | AVE_S.H       | Vector Signed Average (halfword)                         |
1306   * | AVE_S.W       | Vector Signed Average (word)                             |
1307   * | AVE_S.D       | Vector Signed Average (doubleword)                       |
1308   * | AVE_U.B       | Vector Unsigned Average (byte)                           |
1309   * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
1310   * | AVE_U.W       | Vector Unsigned Average (word)                           |
1311   * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
1312   * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
1313   * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
1314   * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
1315   * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
1316   * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
1317   * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
1318   * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
1319   * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
1320   * +---------------+----------------------------------------------------------+
1321   */
1322  
1323  static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1324  {
1325      /* signed shift */
1326      return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
1327  }
1328  
1329  void helper_msa_ave_s_b(CPUMIPSState *env,
1330                          uint32_t wd, uint32_t ws, uint32_t wt)
1331  {
1332      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1333      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1334      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1335  
1336      pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1337      pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1338      pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1339      pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1340      pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1341      pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1342      pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1343      pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1344      pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1345      pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1346      pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1347      pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1348      pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1349      pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1350      pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1351      pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1352  }
1353  
1354  void helper_msa_ave_s_h(CPUMIPSState *env,
1355                          uint32_t wd, uint32_t ws, uint32_t wt)
1356  {
1357      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1358      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1359      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1360  
1361      pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1362      pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1363      pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1364      pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1365      pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1366      pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1367      pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1368      pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1369  }
1370  
1371  void helper_msa_ave_s_w(CPUMIPSState *env,
1372                          uint32_t wd, uint32_t ws, uint32_t wt)
1373  {
1374      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1375      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1376      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1377  
1378      pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1379      pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1380      pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1381      pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1382  }
1383  
1384  void helper_msa_ave_s_d(CPUMIPSState *env,
1385                          uint32_t wd, uint32_t ws, uint32_t wt)
1386  {
1387      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1388      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1389      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1390  
1391      pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1392      pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1393  }
1394  
1395  static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1396  {
1397      uint64_t u_arg1 = UNSIGNED(arg1, df);
1398      uint64_t u_arg2 = UNSIGNED(arg2, df);
1399      /* unsigned shift */
1400      return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
1401  }
1402  
1403  void helper_msa_ave_u_b(CPUMIPSState *env,
1404                          uint32_t wd, uint32_t ws, uint32_t wt)
1405  {
1406      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1407      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1408      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1409  
1410      pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1411      pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1412      pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1413      pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1414      pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1415      pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1416      pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1417      pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1418      pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1419      pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1420      pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1421      pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1422      pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1423      pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1424      pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1425      pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1426  }
1427  
1428  void helper_msa_ave_u_h(CPUMIPSState *env,
1429                          uint32_t wd, uint32_t ws, uint32_t wt)
1430  {
1431      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1432      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1433      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1434  
1435      pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1436      pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1437      pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1438      pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1439      pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1440      pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1441      pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1442      pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1443  }
1444  
1445  void helper_msa_ave_u_w(CPUMIPSState *env,
1446                          uint32_t wd, uint32_t ws, uint32_t wt)
1447  {
1448      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1449      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1450      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1451  
1452      pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1453      pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1454      pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1455      pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1456  }
1457  
1458  void helper_msa_ave_u_d(CPUMIPSState *env,
1459                          uint32_t wd, uint32_t ws, uint32_t wt)
1460  {
1461      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1462      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1463      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1464  
1465      pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1466      pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1467  }
1468  
1469  static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1470  {
1471      /* signed shift */
1472      return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
1473  }
1474  
1475  void helper_msa_aver_s_b(CPUMIPSState *env,
1476                           uint32_t wd, uint32_t ws, uint32_t wt)
1477  {
1478      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1479      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1480      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1481  
1482      pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1483      pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1484      pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1485      pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1486      pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1487      pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1488      pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1489      pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1490      pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1491      pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1492      pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1493      pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1494      pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1495      pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1496      pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1497      pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1498  }
1499  
1500  void helper_msa_aver_s_h(CPUMIPSState *env,
1501                           uint32_t wd, uint32_t ws, uint32_t wt)
1502  {
1503      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1504      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1505      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1506  
1507      pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1508      pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1509      pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1510      pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1511      pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1512      pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1513      pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1514      pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1515  }
1516  
1517  void helper_msa_aver_s_w(CPUMIPSState *env,
1518                           uint32_t wd, uint32_t ws, uint32_t wt)
1519  {
1520      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1521      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1522      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1523  
1524      pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1525      pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1526      pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1527      pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1528  }
1529  
1530  void helper_msa_aver_s_d(CPUMIPSState *env,
1531                           uint32_t wd, uint32_t ws, uint32_t wt)
1532  {
1533      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1534      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1535      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1536  
1537      pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1538      pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1539  }
1540  
1541  static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1542  {
1543      uint64_t u_arg1 = UNSIGNED(arg1, df);
1544      uint64_t u_arg2 = UNSIGNED(arg2, df);
1545      /* unsigned shift */
1546      return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
1547  }
1548  
1549  void helper_msa_aver_u_b(CPUMIPSState *env,
1550                           uint32_t wd, uint32_t ws, uint32_t wt)
1551  {
1552      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1553      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1554      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1555  
1556      pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1557      pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1558      pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1559      pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1560      pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1561      pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1562      pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1563      pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1564      pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1565      pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1566      pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1567      pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1568      pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1569      pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1570      pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1571      pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1572  }
1573  
1574  void helper_msa_aver_u_h(CPUMIPSState *env,
1575                           uint32_t wd, uint32_t ws, uint32_t wt)
1576  {
1577      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1578      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1579      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1580  
1581      pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1582      pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1583      pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1584      pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1585      pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1586      pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1587      pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1588      pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1589  }
1590  
1591  void helper_msa_aver_u_w(CPUMIPSState *env,
1592                           uint32_t wd, uint32_t ws, uint32_t wt)
1593  {
1594      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1595      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1596      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1597  
1598      pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1599      pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1600      pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1601      pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1602  }
1603  
1604  void helper_msa_aver_u_d(CPUMIPSState *env,
1605                           uint32_t wd, uint32_t ws, uint32_t wt)
1606  {
1607      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1608      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1609      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1610  
1611      pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1612      pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1613  }
1614  
1615  
1616  /*
1617   * Int Compare
1618   * -----------
1619   *
1620   * +---------------+----------------------------------------------------------+
1621   * | CEQ.B         | Vector Compare Equal (byte)                              |
1622   * | CEQ.H         | Vector Compare Equal (halfword)                          |
1623   * | CEQ.W         | Vector Compare Equal (word)                              |
1624   * | CEQ.D         | Vector Compare Equal (doubleword)                        |
1625   * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
1626   * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
1627   * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
1628   * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
1629   * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
1630   * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
1631   * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
1632   * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
1633   * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
1634   * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
1635   * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
1636   * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
1637   * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
1638   * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
1639   * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
1640   * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
1641   * +---------------+----------------------------------------------------------+
1642   */
1643  
1644  static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
1645  {
1646      return arg1 == arg2 ? -1 : 0;
1647  }
1648  
1649  static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
1650  {
1651      return arg1 == arg2 ? -1 : 0;
1652  }
1653  
1654  void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1655  {
1656      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1657      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1658      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1659  
1660      pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
1661      pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
1662      pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
1663      pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
1664      pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
1665      pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
1666      pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
1667      pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
1668      pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
1669      pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
1670      pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
1671      pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
1672      pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
1673      pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
1674      pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
1675      pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
1676  }
1677  
1678  static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
1679  {
1680      return arg1 == arg2 ? -1 : 0;
1681  }
1682  
1683  void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1684  {
1685      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1686      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1687      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1688  
1689      pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
1690      pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
1691      pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
1692      pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
1693      pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
1694      pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
1695      pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
1696      pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
1697  }
1698  
1699  static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
1700  {
1701      return arg1 == arg2 ? -1 : 0;
1702  }
1703  
1704  void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1705  {
1706      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1707      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1708      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1709  
1710      pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
1711      pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
1712      pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
1713      pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
1714  }
1715  
1716  static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
1717  {
1718      return arg1 == arg2 ? -1 : 0;
1719  }
1720  
1721  void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1722  {
1723      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1724      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1725      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1726  
1727      pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
1728      pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
1729  }
1730  
1731  static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1732  {
1733      return arg1 <= arg2 ? -1 : 0;
1734  }
1735  
1736  void helper_msa_cle_s_b(CPUMIPSState *env,
1737                          uint32_t wd, uint32_t ws, uint32_t wt)
1738  {
1739      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1740      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1741      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1742  
1743      pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1744      pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1745      pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1746      pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1747      pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1748      pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1749      pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1750      pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1751      pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1752      pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1753      pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1754      pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1755      pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1756      pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1757      pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1758      pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1759  }
1760  
1761  void helper_msa_cle_s_h(CPUMIPSState *env,
1762                          uint32_t wd, uint32_t ws, uint32_t wt)
1763  {
1764      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1765      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1766      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1767  
1768      pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1769      pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1770      pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1771      pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1772      pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1773      pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1774      pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1775      pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1776  }
1777  
1778  void helper_msa_cle_s_w(CPUMIPSState *env,
1779                          uint32_t wd, uint32_t ws, uint32_t wt)
1780  {
1781      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1782      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1783      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1784  
1785      pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1786      pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1787      pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1788      pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1789  }
1790  
1791  void helper_msa_cle_s_d(CPUMIPSState *env,
1792                          uint32_t wd, uint32_t ws, uint32_t wt)
1793  {
1794      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1795      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1796      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1797  
1798      pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1799      pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1800  }
1801  
1802  static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1803  {
1804      uint64_t u_arg1 = UNSIGNED(arg1, df);
1805      uint64_t u_arg2 = UNSIGNED(arg2, df);
1806      return u_arg1 <= u_arg2 ? -1 : 0;
1807  }
1808  
1809  void helper_msa_cle_u_b(CPUMIPSState *env,
1810                          uint32_t wd, uint32_t ws, uint32_t wt)
1811  {
1812      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1813      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1814      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1815  
1816      pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1817      pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1818      pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1819      pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1820      pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1821      pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1822      pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1823      pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1824      pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1825      pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1826      pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1827      pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1828      pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1829      pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1830      pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1831      pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1832  }
1833  
1834  void helper_msa_cle_u_h(CPUMIPSState *env,
1835                          uint32_t wd, uint32_t ws, uint32_t wt)
1836  {
1837      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1838      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1839      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1840  
1841      pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1842      pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1843      pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1844      pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1845      pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1846      pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1847      pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1848      pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1849  }
1850  
1851  void helper_msa_cle_u_w(CPUMIPSState *env,
1852                          uint32_t wd, uint32_t ws, uint32_t wt)
1853  {
1854      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1855      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1856      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1857  
1858      pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1859      pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1860      pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1861      pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1862  }
1863  
1864  void helper_msa_cle_u_d(CPUMIPSState *env,
1865                          uint32_t wd, uint32_t ws, uint32_t wt)
1866  {
1867      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1868      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1869      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1870  
1871      pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1872      pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1873  }
1874  
1875  static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1876  {
1877      return arg1 < arg2 ? -1 : 0;
1878  }
1879  
1880  static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
1881  {
1882      return arg1 < arg2 ? -1 : 0;
1883  }
1884  
1885  void helper_msa_clt_s_b(CPUMIPSState *env,
1886                          uint32_t wd, uint32_t ws, uint32_t wt)
1887  {
1888      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1889      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1890      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1891  
1892      pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
1893      pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
1894      pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
1895      pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
1896      pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
1897      pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
1898      pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
1899      pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
1900      pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
1901      pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
1902      pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
1903      pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
1904      pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
1905      pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
1906      pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
1907      pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
1908  }
1909  
1910  static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
1911  {
1912      return arg1 < arg2 ? -1 : 0;
1913  }
1914  
1915  void helper_msa_clt_s_h(CPUMIPSState *env,
1916                          uint32_t wd, uint32_t ws, uint32_t wt)
1917  {
1918      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1919      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1920      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1921  
1922      pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
1923      pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
1924      pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
1925      pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
1926      pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
1927      pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
1928      pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
1929      pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
1930  }
1931  
1932  static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
1933  {
1934      return arg1 < arg2 ? -1 : 0;
1935  }
1936  
1937  void helper_msa_clt_s_w(CPUMIPSState *env,
1938                          uint32_t wd, uint32_t ws, uint32_t wt)
1939  {
1940      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1941      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1942      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1943  
1944      pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
1945      pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
1946      pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
1947      pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
1948  }
1949  
1950  static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
1951  {
1952      return arg1 < arg2 ? -1 : 0;
1953  }
1954  
1955  void helper_msa_clt_s_d(CPUMIPSState *env,
1956                          uint32_t wd, uint32_t ws, uint32_t wt)
1957  {
1958      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1959      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1960      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1961  
1962      pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
1963      pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
1964  }
1965  
1966  static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1967  {
1968      uint64_t u_arg1 = UNSIGNED(arg1, df);
1969      uint64_t u_arg2 = UNSIGNED(arg2, df);
1970      return u_arg1 < u_arg2 ? -1 : 0;
1971  }
1972  
1973  void helper_msa_clt_u_b(CPUMIPSState *env,
1974                          uint32_t wd, uint32_t ws, uint32_t wt)
1975  {
1976      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1977      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1978      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1979  
1980      pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1981      pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1982      pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1983      pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1984      pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1985      pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1986      pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1987      pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1988      pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1989      pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1990      pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1991      pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1992      pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1993      pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1994      pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1995      pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1996  }
1997  
1998  void helper_msa_clt_u_h(CPUMIPSState *env,
1999                          uint32_t wd, uint32_t ws, uint32_t wt)
2000  {
2001      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2002      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2003      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2004  
2005      pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2006      pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2007      pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2008      pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2009      pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2010      pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2011      pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2012      pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2013  }
2014  
2015  void helper_msa_clt_u_w(CPUMIPSState *env,
2016                          uint32_t wd, uint32_t ws, uint32_t wt)
2017  {
2018      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2019      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2020      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2021  
2022      pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2023      pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2024      pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2025      pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2026  }
2027  
2028  void helper_msa_clt_u_d(CPUMIPSState *env,
2029                          uint32_t wd, uint32_t ws, uint32_t wt)
2030  {
2031      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2032      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2033      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2034  
2035      pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2036      pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2037  }
2038  
2039  
2040  /*
2041   * Int Divide
2042   * ----------
2043   *
2044   * +---------------+----------------------------------------------------------+
2045   * | DIV_S.B       | Vector Signed Divide (byte)                              |
2046   * | DIV_S.H       | Vector Signed Divide (halfword)                          |
2047   * | DIV_S.W       | Vector Signed Divide (word)                              |
2048   * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
2049   * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
2050   * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
2051   * | DIV_U.W       | Vector Unsigned Divide (word)                            |
2052   * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
2053   * +---------------+----------------------------------------------------------+
2054   */
2055  
2056  
2057  static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2058  {
2059      if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
2060          return DF_MIN_INT(df);
2061      }
2062      return arg2 ? arg1 / arg2
2063                  : arg1 >= 0 ? -1 : 1;
2064  }
2065  
2066  void helper_msa_div_s_b(CPUMIPSState *env,
2067                          uint32_t wd, uint32_t ws, uint32_t wt)
2068  {
2069      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2070      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2071      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2072  
2073      pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2074      pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2075      pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2076      pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2077      pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2078      pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2079      pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2080      pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2081      pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2082      pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2083      pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2084      pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2085      pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2086      pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2087      pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2088      pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2089  }
2090  
2091  void helper_msa_div_s_h(CPUMIPSState *env,
2092                          uint32_t wd, uint32_t ws, uint32_t wt)
2093  {
2094      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2095      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2096      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2097  
2098      pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2099      pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2100      pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2101      pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2102      pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2103      pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2104      pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2105      pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2106  }
2107  
2108  void helper_msa_div_s_w(CPUMIPSState *env,
2109                          uint32_t wd, uint32_t ws, uint32_t wt)
2110  {
2111      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2112      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2113      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2114  
2115      pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2116      pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2117      pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2118      pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2119  }
2120  
2121  void helper_msa_div_s_d(CPUMIPSState *env,
2122                          uint32_t wd, uint32_t ws, uint32_t wt)
2123  {
2124      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2125      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2126      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2127  
2128      pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2129      pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2130  }
2131  
2132  static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2133  {
2134      uint64_t u_arg1 = UNSIGNED(arg1, df);
2135      uint64_t u_arg2 = UNSIGNED(arg2, df);
2136      return arg2 ? u_arg1 / u_arg2 : -1;
2137  }
2138  
2139  void helper_msa_div_u_b(CPUMIPSState *env,
2140                          uint32_t wd, uint32_t ws, uint32_t wt)
2141  {
2142      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2143      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2144      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2145  
2146      pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2147      pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2148      pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2149      pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2150      pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2151      pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2152      pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2153      pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2154      pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2155      pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2156      pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2157      pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2158      pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2159      pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2160      pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2161      pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2162  }
2163  
2164  void helper_msa_div_u_h(CPUMIPSState *env,
2165                          uint32_t wd, uint32_t ws, uint32_t wt)
2166  {
2167      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2168      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2169      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2170  
2171      pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2172      pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2173      pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2174      pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2175      pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2176      pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2177      pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2178      pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2179  }
2180  
2181  void helper_msa_div_u_w(CPUMIPSState *env,
2182                          uint32_t wd, uint32_t ws, uint32_t wt)
2183  {
2184      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2185      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2186      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2187  
2188      pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2189      pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2190      pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2191      pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2192  }
2193  
2194  void helper_msa_div_u_d(CPUMIPSState *env,
2195                          uint32_t wd, uint32_t ws, uint32_t wt)
2196  {
2197      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2198      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2199      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2200  
2201      pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2202      pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2203  }
2204  
2205  
2206  /*
2207   * Int Dot Product
2208   * ---------------
2209   *
2210   * +---------------+----------------------------------------------------------+
2211   * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
2212   * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
2213   * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
2214   * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
2215   * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
2216   * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
2217   * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
2218   * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
2219   * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
2220   * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
2221   * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
2222   * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2223   * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
2224   * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
2225   * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
2226   * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
2227   * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
2228   * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2229   * +---------------+----------------------------------------------------------+
2230   */
2231  
2232  #define SIGNED_EXTRACT(e, o, a, df)     \
2233      do {                                \
2234          e = SIGNED_EVEN(a, df);         \
2235          o = SIGNED_ODD(a, df);          \
2236      } while (0)
2237  
2238  #define UNSIGNED_EXTRACT(e, o, a, df)   \
2239      do {                                \
2240          e = UNSIGNED_EVEN(a, df);       \
2241          o = UNSIGNED_ODD(a, df);        \
2242      } while (0)
2243  
2244  
2245  static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2246  {
2247      int64_t even_arg1;
2248      int64_t even_arg2;
2249      int64_t odd_arg1;
2250      int64_t odd_arg2;
2251      SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2252      SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2253      return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2254  }
2255  
2256  void helper_msa_dotp_s_h(CPUMIPSState *env,
2257                           uint32_t wd, uint32_t ws, uint32_t wt)
2258  {
2259      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2260      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2261      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2262  
2263      pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2264      pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2265      pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2266      pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2267      pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2268      pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2269      pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2270      pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2271  }
2272  
2273  void helper_msa_dotp_s_w(CPUMIPSState *env,
2274                           uint32_t wd, uint32_t ws, uint32_t wt)
2275  {
2276      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2277      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2278      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2279  
2280      pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2281      pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2282      pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2283      pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2284  }
2285  
2286  void helper_msa_dotp_s_d(CPUMIPSState *env,
2287                           uint32_t wd, uint32_t ws, uint32_t wt)
2288  {
2289      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2290      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2291      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2292  
2293      pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2294      pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2295  }
2296  
2297  
2298  static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2299  {
2300      int64_t even_arg1;
2301      int64_t even_arg2;
2302      int64_t odd_arg1;
2303      int64_t odd_arg2;
2304      UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2305      UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2306      return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2307  }
2308  
2309  void helper_msa_dotp_u_h(CPUMIPSState *env,
2310                           uint32_t wd, uint32_t ws, uint32_t wt)
2311  {
2312      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2313      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2314      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2315  
2316      pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2317      pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2318      pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2319      pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2320      pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2321      pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2322      pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2323      pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2324  }
2325  
2326  void helper_msa_dotp_u_w(CPUMIPSState *env,
2327                           uint32_t wd, uint32_t ws, uint32_t wt)
2328  {
2329      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2330      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2331      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2332  
2333      pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2334      pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2335      pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2336      pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2337  }
2338  
2339  void helper_msa_dotp_u_d(CPUMIPSState *env,
2340                           uint32_t wd, uint32_t ws, uint32_t wt)
2341  {
2342      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2343      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2344      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2345  
2346      pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2347      pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2348  }
2349  
2350  
2351  static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
2352                                       int64_t arg2)
2353  {
2354      int64_t even_arg1;
2355      int64_t even_arg2;
2356      int64_t odd_arg1;
2357      int64_t odd_arg2;
2358      SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2359      SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2360      return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2361  }
2362  
2363  void helper_msa_dpadd_s_h(CPUMIPSState *env,
2364                            uint32_t wd, uint32_t ws, uint32_t wt)
2365  {
2366      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2367      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2368      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2369  
2370      pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2371      pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2372      pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2373      pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2374      pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2375      pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2376      pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2377      pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2378  }
2379  
2380  void helper_msa_dpadd_s_w(CPUMIPSState *env,
2381                            uint32_t wd, uint32_t ws, uint32_t wt)
2382  {
2383      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2384      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2385      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2386  
2387      pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2388      pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2389      pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2390      pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2391  }
2392  
2393  void helper_msa_dpadd_s_d(CPUMIPSState *env,
2394                            uint32_t wd, uint32_t ws, uint32_t wt)
2395  {
2396      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2397      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2398      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2399  
2400      pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2401      pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2402  }
2403  
2404  
2405  static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
2406                                       int64_t arg2)
2407  {
2408      int64_t even_arg1;
2409      int64_t even_arg2;
2410      int64_t odd_arg1;
2411      int64_t odd_arg2;
2412      UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2413      UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2414      return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2415  }
2416  
2417  void helper_msa_dpadd_u_h(CPUMIPSState *env,
2418                            uint32_t wd, uint32_t ws, uint32_t wt)
2419  {
2420      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2421      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2422      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2423  
2424      pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2425      pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2426      pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2427      pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2428      pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2429      pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2430      pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2431      pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2432  }
2433  
2434  void helper_msa_dpadd_u_w(CPUMIPSState *env,
2435                            uint32_t wd, uint32_t ws, uint32_t wt)
2436  {
2437      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2438      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2439      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2440  
2441      pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2442      pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2443      pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2444      pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2445  }
2446  
2447  void helper_msa_dpadd_u_d(CPUMIPSState *env,
2448                            uint32_t wd, uint32_t ws, uint32_t wt)
2449  {
2450      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2451      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2452      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2453  
2454      pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2455      pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2456  }
2457  
2458  
2459  static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
2460                                       int64_t arg2)
2461  {
2462      int64_t even_arg1;
2463      int64_t even_arg2;
2464      int64_t odd_arg1;
2465      int64_t odd_arg2;
2466      SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2467      SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2468      return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2469  }
2470  
2471  void helper_msa_dpsub_s_h(CPUMIPSState *env,
2472                            uint32_t wd, uint32_t ws, uint32_t wt)
2473  {
2474      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2475      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2476      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2477  
2478      pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2479      pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2480      pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2481      pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2482      pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2483      pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2484      pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2485      pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2486  }
2487  
2488  void helper_msa_dpsub_s_w(CPUMIPSState *env,
2489                            uint32_t wd, uint32_t ws, uint32_t wt)
2490  {
2491      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2492      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2493      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2494  
2495      pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2496      pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2497      pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2498      pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2499  }
2500  
2501  void helper_msa_dpsub_s_d(CPUMIPSState *env,
2502                            uint32_t wd, uint32_t ws, uint32_t wt)
2503  {
2504      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2505      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2506      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2507  
2508      pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2509      pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2510  }
2511  
2512  
2513  static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
2514                                       int64_t arg2)
2515  {
2516      int64_t even_arg1;
2517      int64_t even_arg2;
2518      int64_t odd_arg1;
2519      int64_t odd_arg2;
2520      UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2521      UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2522      return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2523  }
2524  
2525  void helper_msa_dpsub_u_h(CPUMIPSState *env,
2526                            uint32_t wd, uint32_t ws, uint32_t wt)
2527  {
2528      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2529      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2530      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2531  
2532      pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2533      pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2534      pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2535      pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2536      pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2537      pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2538      pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2539      pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2540  }
2541  
2542  void helper_msa_dpsub_u_w(CPUMIPSState *env,
2543                            uint32_t wd, uint32_t ws, uint32_t wt)
2544  {
2545      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2546      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2547      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2548  
2549      pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2550      pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2551      pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2552      pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2553  }
2554  
2555  void helper_msa_dpsub_u_d(CPUMIPSState *env,
2556                            uint32_t wd, uint32_t ws, uint32_t wt)
2557  {
2558      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2559      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2560      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2561  
2562      pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2563      pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2564  }
2565  
2566  
2567  /*
2568   * Int Max Min
2569   * -----------
2570   *
2571   * +---------------+----------------------------------------------------------+
2572   * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
2573   * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
2574   * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
2575   * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
2576   * | MAX_S.B       | Vector Signed Maximum (byte)                             |
2577   * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
2578   * | MAX_S.W       | Vector Signed Maximum (word)                             |
2579   * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
2580   * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
2581   * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
2582   * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
2583   * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
2584   * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
2585   * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
2586   * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
2587   * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
2588   * | MIN_S.B       | Vector Signed Minimum (byte)                             |
2589   * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
2590   * | MIN_S.W       | Vector Signed Minimum (word)                             |
2591   * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
2592   * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
2593   * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
2594   * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
2595   * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
2596   * +---------------+----------------------------------------------------------+
2597   */
2598  
2599  static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2600  {
2601      uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2602      uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2603      return abs_arg1 > abs_arg2 ? arg1 : arg2;
2604  }
2605  
2606  void helper_msa_max_a_b(CPUMIPSState *env,
2607                          uint32_t wd, uint32_t ws, uint32_t wt)
2608  {
2609      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2610      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2611      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2612  
2613      pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2614      pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2615      pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2616      pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2617      pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2618      pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2619      pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2620      pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2621      pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2622      pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2623      pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2624      pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2625      pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2626      pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2627      pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2628      pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2629  }
2630  
2631  void helper_msa_max_a_h(CPUMIPSState *env,
2632                          uint32_t wd, uint32_t ws, uint32_t wt)
2633  {
2634      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2635      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2636      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2637  
2638      pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2639      pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2640      pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2641      pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2642      pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2643      pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2644      pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2645      pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2646  }
2647  
2648  void helper_msa_max_a_w(CPUMIPSState *env,
2649                          uint32_t wd, uint32_t ws, uint32_t wt)
2650  {
2651      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2652      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2653      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2654  
2655      pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2656      pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2657      pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2658      pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2659  }
2660  
2661  void helper_msa_max_a_d(CPUMIPSState *env,
2662                          uint32_t wd, uint32_t ws, uint32_t wt)
2663  {
2664      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2665      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2666      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2667  
2668      pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2669      pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2670  }
2671  
2672  
2673  static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2674  {
2675      return arg1 > arg2 ? arg1 : arg2;
2676  }
2677  
2678  void helper_msa_max_s_b(CPUMIPSState *env,
2679                          uint32_t wd, uint32_t ws, uint32_t wt)
2680  {
2681      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2682      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2683      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2684  
2685      pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2686      pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2687      pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2688      pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2689      pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2690      pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2691      pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2692      pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2693      pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2694      pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2695      pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2696      pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2697      pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2698      pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2699      pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2700      pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2701  }
2702  
2703  void helper_msa_max_s_h(CPUMIPSState *env,
2704                          uint32_t wd, uint32_t ws, uint32_t wt)
2705  {
2706      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2707      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2708      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2709  
2710      pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2711      pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2712      pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2713      pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2714      pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2715      pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2716      pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2717      pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2718  }
2719  
2720  void helper_msa_max_s_w(CPUMIPSState *env,
2721                          uint32_t wd, uint32_t ws, uint32_t wt)
2722  {
2723      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2724      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2725      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2726  
2727      pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2728      pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2729      pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2730      pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2731  }
2732  
2733  void helper_msa_max_s_d(CPUMIPSState *env,
2734                          uint32_t wd, uint32_t ws, uint32_t wt)
2735  {
2736      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2737      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2738      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2739  
2740      pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2741      pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2742  }
2743  
2744  
2745  static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2746  {
2747      uint64_t u_arg1 = UNSIGNED(arg1, df);
2748      uint64_t u_arg2 = UNSIGNED(arg2, df);
2749      return u_arg1 > u_arg2 ? arg1 : arg2;
2750  }
2751  
2752  void helper_msa_max_u_b(CPUMIPSState *env,
2753                          uint32_t wd, uint32_t ws, uint32_t wt)
2754  {
2755      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2756      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2757      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2758  
2759      pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2760      pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2761      pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2762      pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2763      pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2764      pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2765      pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2766      pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2767      pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2768      pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2769      pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2770      pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2771      pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2772      pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2773      pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2774      pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2775  }
2776  
2777  void helper_msa_max_u_h(CPUMIPSState *env,
2778                          uint32_t wd, uint32_t ws, uint32_t wt)
2779  {
2780      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2781      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2782      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2783  
2784      pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2785      pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2786      pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2787      pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2788      pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2789      pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2790      pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2791      pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2792  }
2793  
2794  void helper_msa_max_u_w(CPUMIPSState *env,
2795                          uint32_t wd, uint32_t ws, uint32_t wt)
2796  {
2797      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2798      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2799      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2800  
2801      pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2802      pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2803      pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2804      pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2805  }
2806  
2807  void helper_msa_max_u_d(CPUMIPSState *env,
2808                          uint32_t wd, uint32_t ws, uint32_t wt)
2809  {
2810      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2811      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2812      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2813  
2814      pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2815      pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2816  }
2817  
2818  
2819  static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2820  {
2821      uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2822      uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2823      return abs_arg1 < abs_arg2 ? arg1 : arg2;
2824  }
2825  
2826  void helper_msa_min_a_b(CPUMIPSState *env,
2827                          uint32_t wd, uint32_t ws, uint32_t wt)
2828  {
2829      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2830      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2831      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2832  
2833      pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2834      pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2835      pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2836      pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2837      pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2838      pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2839      pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2840      pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2841      pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2842      pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2843      pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2844      pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2845      pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2846      pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2847      pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2848      pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2849  }
2850  
2851  void helper_msa_min_a_h(CPUMIPSState *env,
2852                          uint32_t wd, uint32_t ws, uint32_t wt)
2853  {
2854      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2855      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2856      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2857  
2858      pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2859      pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2860      pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2861      pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2862      pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2863      pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2864      pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2865      pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2866  }
2867  
2868  void helper_msa_min_a_w(CPUMIPSState *env,
2869                          uint32_t wd, uint32_t ws, uint32_t wt)
2870  {
2871      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2872      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2873      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2874  
2875      pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2876      pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2877      pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2878      pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2879  }
2880  
2881  void helper_msa_min_a_d(CPUMIPSState *env,
2882                          uint32_t wd, uint32_t ws, uint32_t wt)
2883  {
2884      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2885      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2886      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2887  
2888      pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2889      pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2890  }
2891  
2892  
2893  static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2894  {
2895      return arg1 < arg2 ? arg1 : arg2;
2896  }
2897  
2898  void helper_msa_min_s_b(CPUMIPSState *env,
2899                          uint32_t wd, uint32_t ws, uint32_t wt)
2900  {
2901      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2902      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2903      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2904  
2905      pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2906      pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2907      pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2908      pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2909      pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2910      pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2911      pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2912      pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2913      pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2914      pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2915      pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2916      pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2917      pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2918      pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2919      pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2920      pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2921  }
2922  
2923  void helper_msa_min_s_h(CPUMIPSState *env,
2924                          uint32_t wd, uint32_t ws, uint32_t wt)
2925  {
2926      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2927      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2928      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2929  
2930      pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2931      pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2932      pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2933      pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2934      pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2935      pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2936      pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2937      pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2938  }
2939  
2940  void helper_msa_min_s_w(CPUMIPSState *env,
2941                          uint32_t wd, uint32_t ws, uint32_t wt)
2942  {
2943      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2944      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2945      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2946  
2947      pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2948      pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2949      pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2950      pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2951  }
2952  
2953  void helper_msa_min_s_d(CPUMIPSState *env,
2954                          uint32_t wd, uint32_t ws, uint32_t wt)
2955  {
2956      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2957      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2958      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2959  
2960      pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2961      pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2962  }
2963  
2964  
2965  static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2966  {
2967      uint64_t u_arg1 = UNSIGNED(arg1, df);
2968      uint64_t u_arg2 = UNSIGNED(arg2, df);
2969      return u_arg1 < u_arg2 ? arg1 : arg2;
2970  }
2971  
2972  void helper_msa_min_u_b(CPUMIPSState *env,
2973                          uint32_t wd, uint32_t ws, uint32_t wt)
2974  {
2975      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2976      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2977      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2978  
2979      pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2980      pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2981      pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2982      pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2983      pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2984      pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2985      pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2986      pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2987      pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2988      pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2989      pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2990      pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2991      pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2992      pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2993      pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2994      pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2995  }
2996  
2997  void helper_msa_min_u_h(CPUMIPSState *env,
2998                          uint32_t wd, uint32_t ws, uint32_t wt)
2999  {
3000      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3001      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3002      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3003  
3004      pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3005      pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3006      pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3007      pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3008      pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3009      pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3010      pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3011      pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3012  }
3013  
3014  void helper_msa_min_u_w(CPUMIPSState *env,
3015                          uint32_t wd, uint32_t ws, uint32_t wt)
3016  {
3017      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3018      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3019      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3020  
3021      pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3022      pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3023      pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3024      pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3025  }
3026  
3027  void helper_msa_min_u_d(CPUMIPSState *env,
3028                          uint32_t wd, uint32_t ws, uint32_t wt)
3029  {
3030      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3031      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3032      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3033  
3034      pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3035      pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3036  }
3037  
3038  
3039  /*
3040   * Int Modulo
3041   * ----------
3042   *
3043   * +---------------+----------------------------------------------------------+
3044   * | MOD_S.B       | Vector Signed Modulo (byte)                              |
3045   * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
3046   * | MOD_S.W       | Vector Signed Modulo (word)                              |
3047   * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
3048   * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
3049   * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
3050   * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
3051   * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
3052   * +---------------+----------------------------------------------------------+
3053   */
3054  
3055  static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3056  {
3057      if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
3058          return 0;
3059      }
3060      return arg2 ? arg1 % arg2 : arg1;
3061  }
3062  
3063  void helper_msa_mod_s_b(CPUMIPSState *env,
3064                          uint32_t wd, uint32_t ws, uint32_t wt)
3065  {
3066      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3067      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3068      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3069  
3070      pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3071      pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3072      pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3073      pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3074      pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3075      pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3076      pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3077      pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3078      pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3079      pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3080      pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3081      pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3082      pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3083      pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3084      pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3085      pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3086  }
3087  
3088  void helper_msa_mod_s_h(CPUMIPSState *env,
3089                          uint32_t wd, uint32_t ws, uint32_t wt)
3090  {
3091      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3092      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3093      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3094  
3095      pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3096      pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3097      pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3098      pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3099      pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3100      pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3101      pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3102      pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3103  }
3104  
3105  void helper_msa_mod_s_w(CPUMIPSState *env,
3106                          uint32_t wd, uint32_t ws, uint32_t wt)
3107  {
3108      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3109      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3110      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3111  
3112      pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3113      pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3114      pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3115      pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3116  }
3117  
3118  void helper_msa_mod_s_d(CPUMIPSState *env,
3119                          uint32_t wd, uint32_t ws, uint32_t wt)
3120  {
3121      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3122      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3123      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3124  
3125      pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3126      pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3127  }
3128  
3129  static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3130  {
3131      uint64_t u_arg1 = UNSIGNED(arg1, df);
3132      uint64_t u_arg2 = UNSIGNED(arg2, df);
3133      return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
3134  }
3135  
3136  void helper_msa_mod_u_b(CPUMIPSState *env,
3137                          uint32_t wd, uint32_t ws, uint32_t wt)
3138  {
3139      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3140      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3141      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3142  
3143      pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3144      pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3145      pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3146      pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3147      pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3148      pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3149      pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3150      pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3151      pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3152      pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3153      pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3154      pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3155      pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3156      pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3157      pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3158      pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3159  }
3160  
3161  void helper_msa_mod_u_h(CPUMIPSState *env,
3162                          uint32_t wd, uint32_t ws, uint32_t wt)
3163  {
3164      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3165      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3166      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3167  
3168      pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3169      pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3170      pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3171      pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3172      pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3173      pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3174      pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3175      pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3176  }
3177  
3178  void helper_msa_mod_u_w(CPUMIPSState *env,
3179                          uint32_t wd, uint32_t ws, uint32_t wt)
3180  {
3181      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3182      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3183      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3184  
3185      pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3186      pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3187      pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3188      pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3189  }
3190  
3191  void helper_msa_mod_u_d(CPUMIPSState *env,
3192                          uint32_t wd, uint32_t ws, uint32_t wt)
3193  {
3194      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3195      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3196      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3197  
3198      pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3199      pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3200  }
3201  
3202  
3203  /*
3204   * Int Multiply
3205   * ------------
3206   *
3207   * +---------------+----------------------------------------------------------+
3208   * | MADDV.B       | Vector Multiply and Add (byte)                           |
3209   * | MADDV.H       | Vector Multiply and Add (halfword)                       |
3210   * | MADDV.W       | Vector Multiply and Add (word)                           |
3211   * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
3212   * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
3213   * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
3214   * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
3215   * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
3216   * | MULV.B        | Vector Multiply (byte)                                   |
3217   * | MULV.H        | Vector Multiply (halfword)                               |
3218   * | MULV.W        | Vector Multiply (word)                                   |
3219   * | MULV.D        | Vector Multiply (doubleword)                             |
3220   * +---------------+----------------------------------------------------------+
3221   */
3222  
3223  static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
3224                                     int64_t arg2)
3225  {
3226      return dest + arg1 * arg2;
3227  }
3228  
3229  void helper_msa_maddv_b(CPUMIPSState *env,
3230                          uint32_t wd, uint32_t ws, uint32_t wt)
3231  {
3232      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3233      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3234      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3235  
3236      pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3237      pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3238      pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3239      pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3240      pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3241      pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3242      pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3243      pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3244      pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3245      pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3246      pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3247      pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3248      pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3249      pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3250      pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3251      pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3252  }
3253  
3254  void helper_msa_maddv_h(CPUMIPSState *env,
3255                          uint32_t wd, uint32_t ws, uint32_t wt)
3256  {
3257      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3258      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3259      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3260  
3261      pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3262      pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3263      pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3264      pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3265      pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3266      pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3267      pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3268      pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3269  }
3270  
3271  void helper_msa_maddv_w(CPUMIPSState *env,
3272                          uint32_t wd, uint32_t ws, uint32_t wt)
3273  {
3274      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3275      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3276      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3277  
3278      pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3279      pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3280      pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3281      pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3282  }
3283  
3284  void helper_msa_maddv_d(CPUMIPSState *env,
3285                          uint32_t wd, uint32_t ws, uint32_t wt)
3286  {
3287      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3288      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3289      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3290  
3291      pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3292      pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3293  }
3294  
3295  static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
3296                                     int64_t arg2)
3297  {
3298      return dest - arg1 * arg2;
3299  }
3300  
3301  void helper_msa_msubv_b(CPUMIPSState *env,
3302                          uint32_t wd, uint32_t ws, uint32_t wt)
3303  {
3304      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3305      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3306      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3307  
3308      pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3309      pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3310      pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3311      pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3312      pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3313      pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3314      pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3315      pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3316      pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3317      pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3318      pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3319      pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3320      pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3321      pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3322      pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3323      pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3324  }
3325  
3326  void helper_msa_msubv_h(CPUMIPSState *env,
3327                          uint32_t wd, uint32_t ws, uint32_t wt)
3328  {
3329      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3330      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3331      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3332  
3333      pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3334      pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3335      pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3336      pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3337      pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3338      pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3339      pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3340      pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3341  }
3342  
3343  void helper_msa_msubv_w(CPUMIPSState *env,
3344                          uint32_t wd, uint32_t ws, uint32_t wt)
3345  {
3346      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3347      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3348      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3349  
3350      pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3351      pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3352      pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3353      pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3354  }
3355  
3356  void helper_msa_msubv_d(CPUMIPSState *env,
3357                          uint32_t wd, uint32_t ws, uint32_t wt)
3358  {
3359      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3360      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3361      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3362  
3363      pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3364      pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3365  }
3366  
3367  
3368  static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
3369  {
3370      return arg1 * arg2;
3371  }
3372  
3373  void helper_msa_mulv_b(CPUMIPSState *env,
3374                         uint32_t wd, uint32_t ws, uint32_t wt)
3375  {
3376      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3377      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3378      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3379  
3380      pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3381      pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3382      pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3383      pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3384      pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3385      pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3386      pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3387      pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3388      pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3389      pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3390      pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
3391      pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
3392      pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
3393      pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
3394      pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
3395      pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
3396  }
3397  
3398  void helper_msa_mulv_h(CPUMIPSState *env,
3399                         uint32_t wd, uint32_t ws, uint32_t wt)
3400  {
3401      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3402      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3403      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3404  
3405      pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
3406      pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
3407      pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
3408      pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
3409      pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
3410      pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
3411      pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
3412      pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
3413  }
3414  
3415  void helper_msa_mulv_w(CPUMIPSState *env,
3416                         uint32_t wd, uint32_t ws, uint32_t wt)
3417  {
3418      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3419      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3420      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3421  
3422      pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
3423      pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
3424      pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
3425      pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
3426  }
3427  
3428  void helper_msa_mulv_d(CPUMIPSState *env,
3429                         uint32_t wd, uint32_t ws, uint32_t wt)
3430  {
3431      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3432      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3433      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3434  
3435      pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3436      pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3437  }
3438  
3439  
3440  /*
3441   * Int Subtract
3442   * ------------
3443   *
3444   * +---------------+----------------------------------------------------------+
3445   * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
3446   * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
3447   * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
3448   * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
3449   * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
3450   * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
3451   * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
3452   * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
3453   * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
3454   * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
3455   * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
3456   * | HSUB_U.H      | Vector Unsigned Horizontal Subtract (halfword)           |
3457   * | HSUB_U.W      | Vector Unsigned Horizontal Subtract (word)               |
3458   * | HSUB_U.D      | Vector Unsigned Horizontal Subtract (doubleword)         |
3459   * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
3460   * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
3461   * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
3462   * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
3463   * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
3464   * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
3465   * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
3466   * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
3467   * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
3468   * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
3469   * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
3470   * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
3471   * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
3472   * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
3473   * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
3474   * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
3475   * | SUBV.B        | Vector Subtract (byte)                                   |
3476   * | SUBV.H        | Vector Subtract (halfword)                               |
3477   * | SUBV.W        | Vector Subtract (word)                                   |
3478   * | SUBV.D        | Vector Subtract (doubleword)                             |
3479   * +---------------+----------------------------------------------------------+
3480   */
3481  
3482  
3483  static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3484  {
3485      /* signed compare */
3486      return (arg1 < arg2) ?
3487          (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
3488  }
3489  
3490  void helper_msa_asub_s_b(CPUMIPSState *env,
3491                           uint32_t wd, uint32_t ws, uint32_t wt)
3492  {
3493      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3494      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3495      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3496  
3497      pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3498      pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3499      pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3500      pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3501      pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3502      pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3503      pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3504      pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3505      pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3506      pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3507      pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3508      pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3509      pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3510      pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3511      pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3512      pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3513  }
3514  
3515  void helper_msa_asub_s_h(CPUMIPSState *env,
3516                           uint32_t wd, uint32_t ws, uint32_t wt)
3517  {
3518      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3519      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3520      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3521  
3522      pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3523      pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3524      pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3525      pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3526      pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3527      pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3528      pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3529      pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3530  }
3531  
3532  void helper_msa_asub_s_w(CPUMIPSState *env,
3533                           uint32_t wd, uint32_t ws, uint32_t wt)
3534  {
3535      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3536      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3537      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3538  
3539      pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3540      pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3541      pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3542      pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3543  }
3544  
3545  void helper_msa_asub_s_d(CPUMIPSState *env,
3546                           uint32_t wd, uint32_t ws, uint32_t wt)
3547  {
3548      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3549      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3550      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3551  
3552      pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3553      pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3554  }
3555  
3556  
3557  static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
3558  {
3559      uint64_t u_arg1 = UNSIGNED(arg1, df);
3560      uint64_t u_arg2 = UNSIGNED(arg2, df);
3561      /* unsigned compare */
3562      return (u_arg1 < u_arg2) ?
3563          (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
3564  }
3565  
3566  void helper_msa_asub_u_b(CPUMIPSState *env,
3567                           uint32_t wd, uint32_t ws, uint32_t wt)
3568  {
3569      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3570      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3571      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3572  
3573      pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3574      pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3575      pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3576      pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3577      pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3578      pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3579      pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3580      pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3581      pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3582      pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3583      pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3584      pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3585      pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3586      pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3587      pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3588      pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3589  }
3590  
3591  void helper_msa_asub_u_h(CPUMIPSState *env,
3592                           uint32_t wd, uint32_t ws, uint32_t wt)
3593  {
3594      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3595      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3596      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3597  
3598      pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3599      pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3600      pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3601      pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3602      pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3603      pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3604      pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3605      pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3606  }
3607  
3608  void helper_msa_asub_u_w(CPUMIPSState *env,
3609                           uint32_t wd, uint32_t ws, uint32_t wt)
3610  {
3611      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3612      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3613      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3614  
3615      pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3616      pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3617      pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3618      pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3619  }
3620  
3621  void helper_msa_asub_u_d(CPUMIPSState *env,
3622                           uint32_t wd, uint32_t ws, uint32_t wt)
3623  {
3624      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3625      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3626      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3627  
3628      pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3629      pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3630  }
3631  
3632  
3633  static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3634  {
3635      return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
3636  }
3637  
3638  void helper_msa_hsub_s_h(CPUMIPSState *env,
3639                           uint32_t wd, uint32_t ws, uint32_t wt)
3640  {
3641      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3642      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3643      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3644  
3645      pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3646      pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3647      pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3648      pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3649      pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3650      pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3651      pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3652      pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3653  }
3654  
3655  void helper_msa_hsub_s_w(CPUMIPSState *env,
3656                           uint32_t wd, uint32_t ws, uint32_t wt)
3657  {
3658      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3659      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3660      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3661  
3662      pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3663      pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3664      pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3665      pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3666  }
3667  
3668  void helper_msa_hsub_s_d(CPUMIPSState *env,
3669                           uint32_t wd, uint32_t ws, uint32_t wt)
3670  {
3671      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3672      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3673      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3674  
3675      pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3676      pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3677  }
3678  
3679  
3680  static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3681  {
3682      return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
3683  }
3684  
3685  void helper_msa_hsub_u_h(CPUMIPSState *env,
3686                           uint32_t wd, uint32_t ws, uint32_t wt)
3687  {
3688      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3689      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3690      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3691  
3692      pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3693      pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3694      pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3695      pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3696      pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3697      pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3698      pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3699      pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3700  }
3701  
3702  void helper_msa_hsub_u_w(CPUMIPSState *env,
3703                           uint32_t wd, uint32_t ws, uint32_t wt)
3704  {
3705      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3706      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3707      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3708  
3709      pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3710      pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3711      pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3712      pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3713  }
3714  
3715  void helper_msa_hsub_u_d(CPUMIPSState *env,
3716                           uint32_t wd, uint32_t ws, uint32_t wt)
3717  {
3718      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3719      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3720      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3721  
3722      pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3723      pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3724  }
3725  
3726  
3727  static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3728  {
3729      int64_t max_int = DF_MAX_INT(df);
3730      int64_t min_int = DF_MIN_INT(df);
3731      if (arg2 > 0) {
3732          return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
3733      } else {
3734          return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
3735      }
3736  }
3737  
3738  void helper_msa_subs_s_b(CPUMIPSState *env,
3739                           uint32_t wd, uint32_t ws, uint32_t wt)
3740  {
3741      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3742      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3743      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3744  
3745      pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3746      pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3747      pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3748      pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3749      pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3750      pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3751      pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3752      pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3753      pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3754      pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3755      pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3756      pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3757      pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3758      pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3759      pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3760      pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3761  }
3762  
3763  void helper_msa_subs_s_h(CPUMIPSState *env,
3764                           uint32_t wd, uint32_t ws, uint32_t wt)
3765  {
3766      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3767      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3768      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3769  
3770      pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3771      pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3772      pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3773      pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3774      pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3775      pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3776      pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3777      pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3778  }
3779  
3780  void helper_msa_subs_s_w(CPUMIPSState *env,
3781                           uint32_t wd, uint32_t ws, uint32_t wt)
3782  {
3783      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3784      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3785      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3786  
3787      pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3788      pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3789      pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3790      pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3791  }
3792  
3793  void helper_msa_subs_s_d(CPUMIPSState *env,
3794                           uint32_t wd, uint32_t ws, uint32_t wt)
3795  {
3796      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3797      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3798      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3799  
3800      pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3801      pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3802  }
3803  
3804  
3805  static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3806  {
3807      uint64_t u_arg1 = UNSIGNED(arg1, df);
3808      uint64_t u_arg2 = UNSIGNED(arg2, df);
3809      return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
3810  }
3811  
3812  void helper_msa_subs_u_b(CPUMIPSState *env,
3813                           uint32_t wd, uint32_t ws, uint32_t wt)
3814  {
3815      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3816      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3817      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3818  
3819      pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3820      pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3821      pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3822      pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3823      pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3824      pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3825      pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3826      pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3827      pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3828      pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3829      pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3830      pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3831      pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3832      pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3833      pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3834      pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3835  }
3836  
3837  void helper_msa_subs_u_h(CPUMIPSState *env,
3838                           uint32_t wd, uint32_t ws, uint32_t wt)
3839  {
3840      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3841      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3842      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3843  
3844      pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3845      pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3846      pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3847      pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3848      pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3849      pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3850      pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3851      pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3852  }
3853  
3854  void helper_msa_subs_u_w(CPUMIPSState *env,
3855                           uint32_t wd, uint32_t ws, uint32_t wt)
3856  {
3857      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3858      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3859      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3860  
3861      pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3862      pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3863      pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3864      pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3865  }
3866  
3867  void helper_msa_subs_u_d(CPUMIPSState *env,
3868                           uint32_t wd, uint32_t ws, uint32_t wt)
3869  {
3870      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3871      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3872      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3873  
3874      pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3875      pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3876  }
3877  
3878  
3879  static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3880  {
3881      uint64_t u_arg1 = UNSIGNED(arg1, df);
3882      uint64_t max_uint = DF_MAX_UINT(df);
3883      if (arg2 >= 0) {
3884          uint64_t u_arg2 = (uint64_t)arg2;
3885          return (u_arg1 > u_arg2) ?
3886              (int64_t)(u_arg1 - u_arg2) :
3887              0;
3888      } else {
3889          uint64_t u_arg2 = (uint64_t)(-arg2);
3890          return (u_arg1 < max_uint - u_arg2) ?
3891              (int64_t)(u_arg1 + u_arg2) :
3892              (int64_t)max_uint;
3893      }
3894  }
3895  
3896  void helper_msa_subsus_u_b(CPUMIPSState *env,
3897                             uint32_t wd, uint32_t ws, uint32_t wt)
3898  {
3899      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3900      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3901      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3902  
3903      pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3904      pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3905      pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3906      pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3907      pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3908      pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3909      pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3910      pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3911      pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3912      pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3913      pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3914      pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3915      pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3916      pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3917      pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3918      pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3919  }
3920  
3921  void helper_msa_subsus_u_h(CPUMIPSState *env,
3922                             uint32_t wd, uint32_t ws, uint32_t wt)
3923  {
3924      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3925      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3926      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3927  
3928      pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3929      pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3930      pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3931      pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3932      pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3933      pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3934      pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3935      pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3936  }
3937  
3938  void helper_msa_subsus_u_w(CPUMIPSState *env,
3939                             uint32_t wd, uint32_t ws, uint32_t wt)
3940  {
3941      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3942      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3943      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3944  
3945      pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3946      pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3947      pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3948      pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3949  }
3950  
3951  void helper_msa_subsus_u_d(CPUMIPSState *env,
3952                             uint32_t wd, uint32_t ws, uint32_t wt)
3953  {
3954      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3955      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3956      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3957  
3958      pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3959      pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3960  }
3961  
3962  
3963  static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3964  {
3965      uint64_t u_arg1 = UNSIGNED(arg1, df);
3966      uint64_t u_arg2 = UNSIGNED(arg2, df);
3967      int64_t max_int = DF_MAX_INT(df);
3968      int64_t min_int = DF_MIN_INT(df);
3969      if (u_arg1 > u_arg2) {
3970          return u_arg1 - u_arg2 < (uint64_t)max_int ?
3971              (int64_t)(u_arg1 - u_arg2) :
3972              max_int;
3973      } else {
3974          return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
3975              (int64_t)(u_arg1 - u_arg2) :
3976              min_int;
3977      }
3978  }
3979  
3980  void helper_msa_subsuu_s_b(CPUMIPSState *env,
3981                             uint32_t wd, uint32_t ws, uint32_t wt)
3982  {
3983      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3984      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3985      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3986  
3987      pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3988      pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3989      pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3990      pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3991      pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3992      pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3993      pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3994      pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3995      pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3996      pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3997      pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3998      pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3999      pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
4000      pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
4001      pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
4002      pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
4003  }
4004  
4005  void helper_msa_subsuu_s_h(CPUMIPSState *env,
4006                             uint32_t wd, uint32_t ws, uint32_t wt)
4007  {
4008      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4009      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4010      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4011  
4012      pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
4013      pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
4014      pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
4015      pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
4016      pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
4017      pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
4018      pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
4019      pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
4020  }
4021  
4022  void helper_msa_subsuu_s_w(CPUMIPSState *env,
4023                             uint32_t wd, uint32_t ws, uint32_t wt)
4024  {
4025      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4026      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4027      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4028  
4029      pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
4030      pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
4031      pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
4032      pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
4033  }
4034  
4035  void helper_msa_subsuu_s_d(CPUMIPSState *env,
4036                             uint32_t wd, uint32_t ws, uint32_t wt)
4037  {
4038      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4039      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4040      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4041  
4042      pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4043      pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4044  }
4045  
4046  
4047  static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
4048  {
4049      return arg1 - arg2;
4050  }
4051  
4052  void helper_msa_subv_b(CPUMIPSState *env,
4053                         uint32_t wd, uint32_t ws, uint32_t wt)
4054  {
4055      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4056      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4057      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4058  
4059      pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4060      pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4061      pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4062      pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4063      pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4064      pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4065      pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4066      pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4067      pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4068      pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4069      pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
4070      pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
4071      pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
4072      pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
4073      pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
4074      pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
4075  }
4076  
4077  void helper_msa_subv_h(CPUMIPSState *env,
4078                         uint32_t wd, uint32_t ws, uint32_t wt)
4079  {
4080      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4081      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4082      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4083  
4084      pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
4085      pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
4086      pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
4087      pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
4088      pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
4089      pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
4090      pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
4091      pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
4092  }
4093  
4094  void helper_msa_subv_w(CPUMIPSState *env,
4095                         uint32_t wd, uint32_t ws, uint32_t wt)
4096  {
4097      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4098      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4099      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4100  
4101      pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
4102      pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
4103      pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
4104      pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
4105  }
4106  
4107  void helper_msa_subv_d(CPUMIPSState *env,
4108                         uint32_t wd, uint32_t ws, uint32_t wt)
4109  {
4110      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4111      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4112      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4113  
4114      pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4115      pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4116  }
4117  
4118  
4119  /*
4120   * Interleave
4121   * ----------
4122   *
4123   * +---------------+----------------------------------------------------------+
4124   * | ILVEV.B       | Vector Interleave Even (byte)                            |
4125   * | ILVEV.H       | Vector Interleave Even (halfword)                        |
4126   * | ILVEV.W       | Vector Interleave Even (word)                            |
4127   * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
4128   * | ILVOD.B       | Vector Interleave Odd (byte)                             |
4129   * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
4130   * | ILVOD.W       | Vector Interleave Odd (word)                             |
4131   * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
4132   * | ILVL.B        | Vector Interleave Left (byte)                            |
4133   * | ILVL.H        | Vector Interleave Left (halfword)                        |
4134   * | ILVL.W        | Vector Interleave Left (word)                            |
4135   * | ILVL.D        | Vector Interleave Left (doubleword)                      |
4136   * | ILVR.B        | Vector Interleave Right (byte)                           |
4137   * | ILVR.H        | Vector Interleave Right (halfword)                       |
4138   * | ILVR.W        | Vector Interleave Right (word)                           |
4139   * | ILVR.D        | Vector Interleave Right (doubleword)                     |
4140   * +---------------+----------------------------------------------------------+
4141   */
4142  
4143  
4144  void helper_msa_ilvev_b(CPUMIPSState *env,
4145                          uint32_t wd, uint32_t ws, uint32_t wt)
4146  {
4147      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4148      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4149      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4150  
4151  #if HOST_BIG_ENDIAN
4152      pwd->b[8]  = pws->b[9];
4153      pwd->b[9]  = pwt->b[9];
4154      pwd->b[10] = pws->b[11];
4155      pwd->b[11] = pwt->b[11];
4156      pwd->b[12] = pws->b[13];
4157      pwd->b[13] = pwt->b[13];
4158      pwd->b[14] = pws->b[15];
4159      pwd->b[15] = pwt->b[15];
4160      pwd->b[0]  = pws->b[1];
4161      pwd->b[1]  = pwt->b[1];
4162      pwd->b[2]  = pws->b[3];
4163      pwd->b[3]  = pwt->b[3];
4164      pwd->b[4]  = pws->b[5];
4165      pwd->b[5]  = pwt->b[5];
4166      pwd->b[6]  = pws->b[7];
4167      pwd->b[7]  = pwt->b[7];
4168  #else
4169      pwd->b[15] = pws->b[14];
4170      pwd->b[14] = pwt->b[14];
4171      pwd->b[13] = pws->b[12];
4172      pwd->b[12] = pwt->b[12];
4173      pwd->b[11] = pws->b[10];
4174      pwd->b[10] = pwt->b[10];
4175      pwd->b[9]  = pws->b[8];
4176      pwd->b[8]  = pwt->b[8];
4177      pwd->b[7]  = pws->b[6];
4178      pwd->b[6]  = pwt->b[6];
4179      pwd->b[5]  = pws->b[4];
4180      pwd->b[4]  = pwt->b[4];
4181      pwd->b[3]  = pws->b[2];
4182      pwd->b[2]  = pwt->b[2];
4183      pwd->b[1]  = pws->b[0];
4184      pwd->b[0]  = pwt->b[0];
4185  #endif
4186  }
4187  
4188  void helper_msa_ilvev_h(CPUMIPSState *env,
4189                          uint32_t wd, uint32_t ws, uint32_t wt)
4190  {
4191      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4192      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4193      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4194  
4195  #if HOST_BIG_ENDIAN
4196      pwd->h[4] = pws->h[5];
4197      pwd->h[5] = pwt->h[5];
4198      pwd->h[6] = pws->h[7];
4199      pwd->h[7] = pwt->h[7];
4200      pwd->h[0] = pws->h[1];
4201      pwd->h[1] = pwt->h[1];
4202      pwd->h[2] = pws->h[3];
4203      pwd->h[3] = pwt->h[3];
4204  #else
4205      pwd->h[7] = pws->h[6];
4206      pwd->h[6] = pwt->h[6];
4207      pwd->h[5] = pws->h[4];
4208      pwd->h[4] = pwt->h[4];
4209      pwd->h[3] = pws->h[2];
4210      pwd->h[2] = pwt->h[2];
4211      pwd->h[1] = pws->h[0];
4212      pwd->h[0] = pwt->h[0];
4213  #endif
4214  }
4215  
4216  void helper_msa_ilvev_w(CPUMIPSState *env,
4217                          uint32_t wd, uint32_t ws, uint32_t wt)
4218  {
4219      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4220      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4221      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4222  
4223  #if HOST_BIG_ENDIAN
4224      pwd->w[2] = pws->w[3];
4225      pwd->w[3] = pwt->w[3];
4226      pwd->w[0] = pws->w[1];
4227      pwd->w[1] = pwt->w[1];
4228  #else
4229      pwd->w[3] = pws->w[2];
4230      pwd->w[2] = pwt->w[2];
4231      pwd->w[1] = pws->w[0];
4232      pwd->w[0] = pwt->w[0];
4233  #endif
4234  }
4235  
4236  void helper_msa_ilvev_d(CPUMIPSState *env,
4237                          uint32_t wd, uint32_t ws, uint32_t wt)
4238  {
4239      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4240      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4241      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4242  
4243      pwd->d[1] = pws->d[0];
4244      pwd->d[0] = pwt->d[0];
4245  }
4246  
4247  
4248  void helper_msa_ilvod_b(CPUMIPSState *env,
4249                          uint32_t wd, uint32_t ws, uint32_t wt)
4250  {
4251      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4252      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4253      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4254  
4255  #if HOST_BIG_ENDIAN
4256      pwd->b[7]  = pwt->b[6];
4257      pwd->b[6]  = pws->b[6];
4258      pwd->b[5]  = pwt->b[4];
4259      pwd->b[4]  = pws->b[4];
4260      pwd->b[3]  = pwt->b[2];
4261      pwd->b[2]  = pws->b[2];
4262      pwd->b[1]  = pwt->b[0];
4263      pwd->b[0]  = pws->b[0];
4264      pwd->b[15] = pwt->b[14];
4265      pwd->b[14] = pws->b[14];
4266      pwd->b[13] = pwt->b[12];
4267      pwd->b[12] = pws->b[12];
4268      pwd->b[11] = pwt->b[10];
4269      pwd->b[10] = pws->b[10];
4270      pwd->b[9]  = pwt->b[8];
4271      pwd->b[8]  = pws->b[8];
4272  #else
4273      pwd->b[0]  = pwt->b[1];
4274      pwd->b[1]  = pws->b[1];
4275      pwd->b[2]  = pwt->b[3];
4276      pwd->b[3]  = pws->b[3];
4277      pwd->b[4]  = pwt->b[5];
4278      pwd->b[5]  = pws->b[5];
4279      pwd->b[6]  = pwt->b[7];
4280      pwd->b[7]  = pws->b[7];
4281      pwd->b[8]  = pwt->b[9];
4282      pwd->b[9]  = pws->b[9];
4283      pwd->b[10] = pwt->b[11];
4284      pwd->b[11] = pws->b[11];
4285      pwd->b[12] = pwt->b[13];
4286      pwd->b[13] = pws->b[13];
4287      pwd->b[14] = pwt->b[15];
4288      pwd->b[15] = pws->b[15];
4289  #endif
4290  }
4291  
4292  void helper_msa_ilvod_h(CPUMIPSState *env,
4293                          uint32_t wd, uint32_t ws, uint32_t wt)
4294  {
4295      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4296      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4297      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4298  
4299  #if HOST_BIG_ENDIAN
4300      pwd->h[3] = pwt->h[2];
4301      pwd->h[2] = pws->h[2];
4302      pwd->h[1] = pwt->h[0];
4303      pwd->h[0] = pws->h[0];
4304      pwd->h[7] = pwt->h[6];
4305      pwd->h[6] = pws->h[6];
4306      pwd->h[5] = pwt->h[4];
4307      pwd->h[4] = pws->h[4];
4308  #else
4309      pwd->h[0] = pwt->h[1];
4310      pwd->h[1] = pws->h[1];
4311      pwd->h[2] = pwt->h[3];
4312      pwd->h[3] = pws->h[3];
4313      pwd->h[4] = pwt->h[5];
4314      pwd->h[5] = pws->h[5];
4315      pwd->h[6] = pwt->h[7];
4316      pwd->h[7] = pws->h[7];
4317  #endif
4318  }
4319  
4320  void helper_msa_ilvod_w(CPUMIPSState *env,
4321                          uint32_t wd, uint32_t ws, uint32_t wt)
4322  {
4323      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4324      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4325      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4326  
4327  #if HOST_BIG_ENDIAN
4328      pwd->w[1] = pwt->w[0];
4329      pwd->w[0] = pws->w[0];
4330      pwd->w[3] = pwt->w[2];
4331      pwd->w[2] = pws->w[2];
4332  #else
4333      pwd->w[0] = pwt->w[1];
4334      pwd->w[1] = pws->w[1];
4335      pwd->w[2] = pwt->w[3];
4336      pwd->w[3] = pws->w[3];
4337  #endif
4338  }
4339  
4340  void helper_msa_ilvod_d(CPUMIPSState *env,
4341                          uint32_t wd, uint32_t ws, uint32_t wt)
4342  {
4343      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4344      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4345      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4346  
4347      pwd->d[0] = pwt->d[1];
4348      pwd->d[1] = pws->d[1];
4349  }
4350  
4351  
4352  void helper_msa_ilvl_b(CPUMIPSState *env,
4353                         uint32_t wd, uint32_t ws, uint32_t wt)
4354  {
4355      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4356      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4357      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4358  
4359  #if HOST_BIG_ENDIAN
4360      pwd->b[7]  = pwt->b[15];
4361      pwd->b[6]  = pws->b[15];
4362      pwd->b[5]  = pwt->b[14];
4363      pwd->b[4]  = pws->b[14];
4364      pwd->b[3]  = pwt->b[13];
4365      pwd->b[2]  = pws->b[13];
4366      pwd->b[1]  = pwt->b[12];
4367      pwd->b[0]  = pws->b[12];
4368      pwd->b[15] = pwt->b[11];
4369      pwd->b[14] = pws->b[11];
4370      pwd->b[13] = pwt->b[10];
4371      pwd->b[12] = pws->b[10];
4372      pwd->b[11] = pwt->b[9];
4373      pwd->b[10] = pws->b[9];
4374      pwd->b[9]  = pwt->b[8];
4375      pwd->b[8]  = pws->b[8];
4376  #else
4377      pwd->b[0]  = pwt->b[8];
4378      pwd->b[1]  = pws->b[8];
4379      pwd->b[2]  = pwt->b[9];
4380      pwd->b[3]  = pws->b[9];
4381      pwd->b[4]  = pwt->b[10];
4382      pwd->b[5]  = pws->b[10];
4383      pwd->b[6]  = pwt->b[11];
4384      pwd->b[7]  = pws->b[11];
4385      pwd->b[8]  = pwt->b[12];
4386      pwd->b[9]  = pws->b[12];
4387      pwd->b[10] = pwt->b[13];
4388      pwd->b[11] = pws->b[13];
4389      pwd->b[12] = pwt->b[14];
4390      pwd->b[13] = pws->b[14];
4391      pwd->b[14] = pwt->b[15];
4392      pwd->b[15] = pws->b[15];
4393  #endif
4394  }
4395  
4396  void helper_msa_ilvl_h(CPUMIPSState *env,
4397                         uint32_t wd, uint32_t ws, uint32_t wt)
4398  {
4399      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4400      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4401      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4402  
4403  #if HOST_BIG_ENDIAN
4404      pwd->h[3] = pwt->h[7];
4405      pwd->h[2] = pws->h[7];
4406      pwd->h[1] = pwt->h[6];
4407      pwd->h[0] = pws->h[6];
4408      pwd->h[7] = pwt->h[5];
4409      pwd->h[6] = pws->h[5];
4410      pwd->h[5] = pwt->h[4];
4411      pwd->h[4] = pws->h[4];
4412  #else
4413      pwd->h[0] = pwt->h[4];
4414      pwd->h[1] = pws->h[4];
4415      pwd->h[2] = pwt->h[5];
4416      pwd->h[3] = pws->h[5];
4417      pwd->h[4] = pwt->h[6];
4418      pwd->h[5] = pws->h[6];
4419      pwd->h[6] = pwt->h[7];
4420      pwd->h[7] = pws->h[7];
4421  #endif
4422  }
4423  
4424  void helper_msa_ilvl_w(CPUMIPSState *env,
4425                         uint32_t wd, uint32_t ws, uint32_t wt)
4426  {
4427      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4428      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4429      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4430  
4431  #if HOST_BIG_ENDIAN
4432      pwd->w[1] = pwt->w[3];
4433      pwd->w[0] = pws->w[3];
4434      pwd->w[3] = pwt->w[2];
4435      pwd->w[2] = pws->w[2];
4436  #else
4437      pwd->w[0] = pwt->w[2];
4438      pwd->w[1] = pws->w[2];
4439      pwd->w[2] = pwt->w[3];
4440      pwd->w[3] = pws->w[3];
4441  #endif
4442  }
4443  
4444  void helper_msa_ilvl_d(CPUMIPSState *env,
4445                         uint32_t wd, uint32_t ws, uint32_t wt)
4446  {
4447      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4448      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4449      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4450  
4451      pwd->d[0] = pwt->d[1];
4452      pwd->d[1] = pws->d[1];
4453  }
4454  
4455  
4456  void helper_msa_ilvr_b(CPUMIPSState *env,
4457                         uint32_t wd, uint32_t ws, uint32_t wt)
4458  {
4459      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4460      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4461      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4462  
4463  #if HOST_BIG_ENDIAN
4464      pwd->b[8]  = pws->b[0];
4465      pwd->b[9]  = pwt->b[0];
4466      pwd->b[10] = pws->b[1];
4467      pwd->b[11] = pwt->b[1];
4468      pwd->b[12] = pws->b[2];
4469      pwd->b[13] = pwt->b[2];
4470      pwd->b[14] = pws->b[3];
4471      pwd->b[15] = pwt->b[3];
4472      pwd->b[0]  = pws->b[4];
4473      pwd->b[1]  = pwt->b[4];
4474      pwd->b[2]  = pws->b[5];
4475      pwd->b[3]  = pwt->b[5];
4476      pwd->b[4]  = pws->b[6];
4477      pwd->b[5]  = pwt->b[6];
4478      pwd->b[6]  = pws->b[7];
4479      pwd->b[7]  = pwt->b[7];
4480  #else
4481      pwd->b[15] = pws->b[7];
4482      pwd->b[14] = pwt->b[7];
4483      pwd->b[13] = pws->b[6];
4484      pwd->b[12] = pwt->b[6];
4485      pwd->b[11] = pws->b[5];
4486      pwd->b[10] = pwt->b[5];
4487      pwd->b[9]  = pws->b[4];
4488      pwd->b[8]  = pwt->b[4];
4489      pwd->b[7]  = pws->b[3];
4490      pwd->b[6]  = pwt->b[3];
4491      pwd->b[5]  = pws->b[2];
4492      pwd->b[4]  = pwt->b[2];
4493      pwd->b[3]  = pws->b[1];
4494      pwd->b[2]  = pwt->b[1];
4495      pwd->b[1]  = pws->b[0];
4496      pwd->b[0]  = pwt->b[0];
4497  #endif
4498  }
4499  
4500  void helper_msa_ilvr_h(CPUMIPSState *env,
4501                         uint32_t wd, uint32_t ws, uint32_t wt)
4502  {
4503      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4504      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4505      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4506  
4507  #if HOST_BIG_ENDIAN
4508      pwd->h[4] = pws->h[0];
4509      pwd->h[5] = pwt->h[0];
4510      pwd->h[6] = pws->h[1];
4511      pwd->h[7] = pwt->h[1];
4512      pwd->h[0] = pws->h[2];
4513      pwd->h[1] = pwt->h[2];
4514      pwd->h[2] = pws->h[3];
4515      pwd->h[3] = pwt->h[3];
4516  #else
4517      pwd->h[7] = pws->h[3];
4518      pwd->h[6] = pwt->h[3];
4519      pwd->h[5] = pws->h[2];
4520      pwd->h[4] = pwt->h[2];
4521      pwd->h[3] = pws->h[1];
4522      pwd->h[2] = pwt->h[1];
4523      pwd->h[1] = pws->h[0];
4524      pwd->h[0] = pwt->h[0];
4525  #endif
4526  }
4527  
4528  void helper_msa_ilvr_w(CPUMIPSState *env,
4529                         uint32_t wd, uint32_t ws, uint32_t wt)
4530  {
4531      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4532      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4533      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4534  
4535  #if HOST_BIG_ENDIAN
4536      pwd->w[2] = pws->w[0];
4537      pwd->w[3] = pwt->w[0];
4538      pwd->w[0] = pws->w[1];
4539      pwd->w[1] = pwt->w[1];
4540  #else
4541      pwd->w[3] = pws->w[1];
4542      pwd->w[2] = pwt->w[1];
4543      pwd->w[1] = pws->w[0];
4544      pwd->w[0] = pwt->w[0];
4545  #endif
4546  }
4547  
4548  void helper_msa_ilvr_d(CPUMIPSState *env,
4549                         uint32_t wd, uint32_t ws, uint32_t wt)
4550  {
4551      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4552      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4553      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4554  
4555      pwd->d[1] = pws->d[0];
4556      pwd->d[0] = pwt->d[0];
4557  }
4558  
4559  
4560  /*
4561   * Logic
4562   * -----
4563   *
4564   * +---------------+----------------------------------------------------------+
4565   * | AND.V         | Vector Logical And                                       |
4566   * | NOR.V         | Vector Logical Negated Or                                |
4567   * | OR.V          | Vector Logical Or                                        |
4568   * | XOR.V         | Vector Logical Exclusive Or                              |
4569   * +---------------+----------------------------------------------------------+
4570   */
4571  
4572  
4573  void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4574  {
4575      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4576      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4577      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4578  
4579      pwd->d[0] = pws->d[0] & pwt->d[0];
4580      pwd->d[1] = pws->d[1] & pwt->d[1];
4581  }
4582  
4583  void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4584  {
4585      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4586      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4587      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4588  
4589      pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
4590      pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
4591  }
4592  
4593  void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4594  {
4595      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4596      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4597      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4598  
4599      pwd->d[0] = pws->d[0] | pwt->d[0];
4600      pwd->d[1] = pws->d[1] | pwt->d[1];
4601  }
4602  
4603  void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4604  {
4605      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4606      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4607      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4608  
4609      pwd->d[0] = pws->d[0] ^ pwt->d[0];
4610      pwd->d[1] = pws->d[1] ^ pwt->d[1];
4611  }
4612  
4613  
4614  /*
4615   * Move
4616   * ----
4617   *
4618   * +---------------+----------------------------------------------------------+
4619   * | MOVE.V        | Vector Move                                              |
4620   * +---------------+----------------------------------------------------------+
4621   */
4622  
4623  static inline void msa_move_v(wr_t *pwd, wr_t *pws)
4624  {
4625      pwd->d[0] = pws->d[0];
4626      pwd->d[1] = pws->d[1];
4627  }
4628  
4629  void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
4630  {
4631      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4632      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4633  
4634      msa_move_v(pwd, pws);
4635  }
4636  
4637  
4638  /*
4639   * Pack
4640   * ----
4641   *
4642   * +---------------+----------------------------------------------------------+
4643   * | PCKEV.B       | Vector Pack Even (byte)                                  |
4644   * | PCKEV.H       | Vector Pack Even (halfword)                              |
4645   * | PCKEV.W       | Vector Pack Even (word)                                  |
4646   * | PCKEV.D       | Vector Pack Even (doubleword)                            |
4647   * | PCKOD.B       | Vector Pack Odd (byte)                                   |
4648   * | PCKOD.H       | Vector Pack Odd (halfword)                               |
4649   * | PCKOD.W       | Vector Pack Odd (word)                                   |
4650   * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
4651   * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
4652   * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
4653   * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
4654   * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
4655   * +---------------+----------------------------------------------------------+
4656   */
4657  
4658  
4659  void helper_msa_pckev_b(CPUMIPSState *env,
4660                          uint32_t wd, uint32_t ws, uint32_t wt)
4661  {
4662      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4663      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4664      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4665  
4666  #if HOST_BIG_ENDIAN
4667      pwd->b[8]  = pws->b[9];
4668      pwd->b[10] = pws->b[13];
4669      pwd->b[12] = pws->b[1];
4670      pwd->b[14] = pws->b[5];
4671      pwd->b[0]  = pwt->b[9];
4672      pwd->b[2]  = pwt->b[13];
4673      pwd->b[4]  = pwt->b[1];
4674      pwd->b[6]  = pwt->b[5];
4675      pwd->b[9]  = pws->b[11];
4676      pwd->b[13] = pws->b[3];
4677      pwd->b[1]  = pwt->b[11];
4678      pwd->b[5]  = pwt->b[3];
4679      pwd->b[11] = pws->b[15];
4680      pwd->b[3]  = pwt->b[15];
4681      pwd->b[15] = pws->b[7];
4682      pwd->b[7]  = pwt->b[7];
4683  #else
4684      pwd->b[15] = pws->b[14];
4685      pwd->b[13] = pws->b[10];
4686      pwd->b[11] = pws->b[6];
4687      pwd->b[9]  = pws->b[2];
4688      pwd->b[7]  = pwt->b[14];
4689      pwd->b[5]  = pwt->b[10];
4690      pwd->b[3]  = pwt->b[6];
4691      pwd->b[1]  = pwt->b[2];
4692      pwd->b[14] = pws->b[12];
4693      pwd->b[10] = pws->b[4];
4694      pwd->b[6]  = pwt->b[12];
4695      pwd->b[2]  = pwt->b[4];
4696      pwd->b[12] = pws->b[8];
4697      pwd->b[4]  = pwt->b[8];
4698      pwd->b[8]  = pws->b[0];
4699      pwd->b[0]  = pwt->b[0];
4700  #endif
4701  }
4702  
4703  void helper_msa_pckev_h(CPUMIPSState *env,
4704                          uint32_t wd, uint32_t ws, uint32_t wt)
4705  {
4706      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4707      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4708      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4709  
4710  #if HOST_BIG_ENDIAN
4711      pwd->h[4] = pws->h[5];
4712      pwd->h[6] = pws->h[1];
4713      pwd->h[0] = pwt->h[5];
4714      pwd->h[2] = pwt->h[1];
4715      pwd->h[5] = pws->h[7];
4716      pwd->h[1] = pwt->h[7];
4717      pwd->h[7] = pws->h[3];
4718      pwd->h[3] = pwt->h[3];
4719  #else
4720      pwd->h[7] = pws->h[6];
4721      pwd->h[5] = pws->h[2];
4722      pwd->h[3] = pwt->h[6];
4723      pwd->h[1] = pwt->h[2];
4724      pwd->h[6] = pws->h[4];
4725      pwd->h[2] = pwt->h[4];
4726      pwd->h[4] = pws->h[0];
4727      pwd->h[0] = pwt->h[0];
4728  #endif
4729  }
4730  
4731  void helper_msa_pckev_w(CPUMIPSState *env,
4732                          uint32_t wd, uint32_t ws, uint32_t wt)
4733  {
4734      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4735      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4736      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4737  
4738  #if HOST_BIG_ENDIAN
4739      pwd->w[2] = pws->w[3];
4740      pwd->w[0] = pwt->w[3];
4741      pwd->w[3] = pws->w[1];
4742      pwd->w[1] = pwt->w[1];
4743  #else
4744      pwd->w[3] = pws->w[2];
4745      pwd->w[1] = pwt->w[2];
4746      pwd->w[2] = pws->w[0];
4747      pwd->w[0] = pwt->w[0];
4748  #endif
4749  }
4750  
4751  void helper_msa_pckev_d(CPUMIPSState *env,
4752                          uint32_t wd, uint32_t ws, uint32_t wt)
4753  {
4754      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4755      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4756      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4757  
4758      pwd->d[1] = pws->d[0];
4759      pwd->d[0] = pwt->d[0];
4760  }
4761  
4762  
4763  void helper_msa_pckod_b(CPUMIPSState *env,
4764                          uint32_t wd, uint32_t ws, uint32_t wt)
4765  {
4766      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4767      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4768      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4769  
4770  #if HOST_BIG_ENDIAN
4771      pwd->b[7]  = pwt->b[6];
4772      pwd->b[5]  = pwt->b[2];
4773      pwd->b[3]  = pwt->b[14];
4774      pwd->b[1]  = pwt->b[10];
4775      pwd->b[15] = pws->b[6];
4776      pwd->b[13] = pws->b[2];
4777      pwd->b[11] = pws->b[14];
4778      pwd->b[9]  = pws->b[10];
4779      pwd->b[6]  = pwt->b[4];
4780      pwd->b[2]  = pwt->b[12];
4781      pwd->b[14] = pws->b[4];
4782      pwd->b[10] = pws->b[12];
4783      pwd->b[4]  = pwt->b[0];
4784      pwd->b[12] = pws->b[0];
4785      pwd->b[0]  = pwt->b[8];
4786      pwd->b[8]  = pws->b[8];
4787  #else
4788      pwd->b[0]  = pwt->b[1];
4789      pwd->b[2]  = pwt->b[5];
4790      pwd->b[4]  = pwt->b[9];
4791      pwd->b[6]  = pwt->b[13];
4792      pwd->b[8]  = pws->b[1];
4793      pwd->b[10] = pws->b[5];
4794      pwd->b[12] = pws->b[9];
4795      pwd->b[14] = pws->b[13];
4796      pwd->b[1]  = pwt->b[3];
4797      pwd->b[5]  = pwt->b[11];
4798      pwd->b[9]  = pws->b[3];
4799      pwd->b[13] = pws->b[11];
4800      pwd->b[3]  = pwt->b[7];
4801      pwd->b[11] = pws->b[7];
4802      pwd->b[7]  = pwt->b[15];
4803      pwd->b[15] = pws->b[15];
4804  #endif
4805  
4806  }
4807  
4808  void helper_msa_pckod_h(CPUMIPSState *env,
4809                          uint32_t wd, uint32_t ws, uint32_t wt)
4810  {
4811      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4812      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4813      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4814  
4815  #if HOST_BIG_ENDIAN
4816      pwd->h[3] = pwt->h[2];
4817      pwd->h[1] = pwt->h[6];
4818      pwd->h[7] = pws->h[2];
4819      pwd->h[5] = pws->h[6];
4820      pwd->h[2] = pwt->h[0];
4821      pwd->h[6] = pws->h[0];
4822      pwd->h[0] = pwt->h[4];
4823      pwd->h[4] = pws->h[4];
4824  #else
4825      pwd->h[0] = pwt->h[1];
4826      pwd->h[2] = pwt->h[5];
4827      pwd->h[4] = pws->h[1];
4828      pwd->h[6] = pws->h[5];
4829      pwd->h[1] = pwt->h[3];
4830      pwd->h[5] = pws->h[3];
4831      pwd->h[3] = pwt->h[7];
4832      pwd->h[7] = pws->h[7];
4833  #endif
4834  }
4835  
4836  void helper_msa_pckod_w(CPUMIPSState *env,
4837                          uint32_t wd, uint32_t ws, uint32_t wt)
4838  {
4839      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4840      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4841      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4842  
4843  #if HOST_BIG_ENDIAN
4844      pwd->w[1] = pwt->w[0];
4845      pwd->w[3] = pws->w[0];
4846      pwd->w[0] = pwt->w[2];
4847      pwd->w[2] = pws->w[2];
4848  #else
4849      pwd->w[0] = pwt->w[1];
4850      pwd->w[2] = pws->w[1];
4851      pwd->w[1] = pwt->w[3];
4852      pwd->w[3] = pws->w[3];
4853  #endif
4854  }
4855  
4856  void helper_msa_pckod_d(CPUMIPSState *env,
4857                          uint32_t wd, uint32_t ws, uint32_t wt)
4858  {
4859      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4860      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4861      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4862  
4863      pwd->d[0] = pwt->d[1];
4864      pwd->d[1] = pws->d[1];
4865  }
4866  
4867  
4868  /*
4869   * Shift
4870   * -----
4871   *
4872   * +---------------+----------------------------------------------------------+
4873   * | SLL.B         | Vector Shift Left (byte)                                 |
4874   * | SLL.H         | Vector Shift Left (halfword)                             |
4875   * | SLL.W         | Vector Shift Left (word)                                 |
4876   * | SLL.D         | Vector Shift Left (doubleword)                           |
4877   * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
4878   * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
4879   * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
4880   * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
4881   * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
4882   * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
4883   * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
4884   * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
4885   * | SRL.B         | Vector Shift Right Logical (byte)                        |
4886   * | SRL.H         | Vector Shift Right Logical (halfword)                    |
4887   * | SRL.W         | Vector Shift Right Logical (word)                        |
4888   * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
4889   * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
4890   * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
4891   * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
4892   * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
4893   * +---------------+----------------------------------------------------------+
4894   */
4895  
4896  
4897  static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
4898  {
4899      int32_t b_arg2 = BIT_POSITION(arg2, df);
4900      return arg1 << b_arg2;
4901  }
4902  
4903  void helper_msa_sll_b(CPUMIPSState *env,
4904                        uint32_t wd, uint32_t ws, uint32_t wt)
4905  {
4906      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4907      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4908      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4909  
4910      pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4911      pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4912      pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4913      pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4914      pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4915      pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4916      pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4917      pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4918      pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4919      pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4920      pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
4921      pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
4922      pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
4923      pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
4924      pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
4925      pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
4926  }
4927  
4928  void helper_msa_sll_h(CPUMIPSState *env,
4929                        uint32_t wd, uint32_t ws, uint32_t wt)
4930  {
4931      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4932      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4933      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4934  
4935      pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
4936      pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
4937      pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
4938      pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
4939      pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
4940      pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
4941      pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
4942      pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
4943  }
4944  
4945  void helper_msa_sll_w(CPUMIPSState *env,
4946                        uint32_t wd, uint32_t ws, uint32_t wt)
4947  {
4948      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4949      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4950      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4951  
4952      pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
4953      pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
4954      pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
4955      pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
4956  }
4957  
4958  void helper_msa_sll_d(CPUMIPSState *env,
4959                        uint32_t wd, uint32_t ws, uint32_t wt)
4960  {
4961      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4962      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4963      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4964  
4965      pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4966      pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4967  }
4968  
4969  
4970  static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
4971  {
4972      int32_t b_arg2 = BIT_POSITION(arg2, df);
4973      return arg1 >> b_arg2;
4974  }
4975  
4976  void helper_msa_sra_b(CPUMIPSState *env,
4977                        uint32_t wd, uint32_t ws, uint32_t wt)
4978  {
4979      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4980      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4981      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4982  
4983      pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4984      pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4985      pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4986      pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4987      pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4988      pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4989      pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4990      pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4991      pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4992      pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4993      pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
4994      pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
4995      pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
4996      pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
4997      pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
4998      pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
4999  }
5000  
5001  void helper_msa_sra_h(CPUMIPSState *env,
5002                        uint32_t wd, uint32_t ws, uint32_t wt)
5003  {
5004      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5005      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5006      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5007  
5008      pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
5009      pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
5010      pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
5011      pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
5012      pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
5013      pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
5014      pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
5015      pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
5016  }
5017  
5018  void helper_msa_sra_w(CPUMIPSState *env,
5019                        uint32_t wd, uint32_t ws, uint32_t wt)
5020  {
5021      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5022      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5023      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5024  
5025      pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
5026      pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
5027      pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
5028      pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
5029  }
5030  
5031  void helper_msa_sra_d(CPUMIPSState *env,
5032                        uint32_t wd, uint32_t ws, uint32_t wt)
5033  {
5034      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5035      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5036      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5037  
5038      pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5039      pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5040  }
5041  
5042  
5043  static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
5044  {
5045      int32_t b_arg2 = BIT_POSITION(arg2, df);
5046      if (b_arg2 == 0) {
5047          return arg1;
5048      } else {
5049          int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
5050          return (arg1 >> b_arg2) + r_bit;
5051      }
5052  }
5053  
5054  void helper_msa_srar_b(CPUMIPSState *env,
5055                         uint32_t wd, uint32_t ws, uint32_t wt)
5056  {
5057      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5058      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5059      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5060  
5061      pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5062      pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5063      pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5064      pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5065      pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5066      pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5067      pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5068      pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5069      pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5070      pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5071      pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
5072      pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
5073      pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
5074      pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
5075      pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
5076      pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
5077  }
5078  
5079  void helper_msa_srar_h(CPUMIPSState *env,
5080                         uint32_t wd, uint32_t ws, uint32_t wt)
5081  {
5082      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5083      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5084      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5085  
5086      pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
5087      pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
5088      pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
5089      pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
5090      pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
5091      pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
5092      pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
5093      pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
5094  }
5095  
5096  void helper_msa_srar_w(CPUMIPSState *env,
5097                         uint32_t wd, uint32_t ws, uint32_t wt)
5098  {
5099      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5100      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5101      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5102  
5103      pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
5104      pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
5105      pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
5106      pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
5107  }
5108  
5109  void helper_msa_srar_d(CPUMIPSState *env,
5110                         uint32_t wd, uint32_t ws, uint32_t wt)
5111  {
5112      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5113      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5114      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5115  
5116      pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5117      pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5118  }
5119  
5120  
5121  static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
5122  {
5123      uint64_t u_arg1 = UNSIGNED(arg1, df);
5124      int32_t b_arg2 = BIT_POSITION(arg2, df);
5125      return u_arg1 >> b_arg2;
5126  }
5127  
5128  void helper_msa_srl_b(CPUMIPSState *env,
5129                        uint32_t wd, uint32_t ws, uint32_t wt)
5130  {
5131      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5132      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5133      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5134  
5135      pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5136      pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5137      pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5138      pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5139      pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5140      pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5141      pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5142      pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5143      pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5144      pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5145      pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
5146      pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
5147      pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
5148      pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
5149      pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
5150      pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
5151  }
5152  
5153  void helper_msa_srl_h(CPUMIPSState *env,
5154                        uint32_t wd, uint32_t ws, uint32_t wt)
5155  {
5156      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5157      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5158      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5159  
5160      pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
5161      pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
5162      pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
5163      pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
5164      pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
5165      pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
5166      pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
5167      pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
5168  }
5169  
5170  void helper_msa_srl_w(CPUMIPSState *env,
5171                        uint32_t wd, uint32_t ws, uint32_t wt)
5172  {
5173      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5174      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5175      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5176  
5177      pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
5178      pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
5179      pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
5180      pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
5181  }
5182  
5183  void helper_msa_srl_d(CPUMIPSState *env,
5184                        uint32_t wd, uint32_t ws, uint32_t wt)
5185  {
5186      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5187      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5188      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5189  
5190      pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5191      pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5192  }
5193  
5194  
5195  static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
5196  {
5197      uint64_t u_arg1 = UNSIGNED(arg1, df);
5198      int32_t b_arg2 = BIT_POSITION(arg2, df);
5199      if (b_arg2 == 0) {
5200          return u_arg1;
5201      } else {
5202          uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
5203          return (u_arg1 >> b_arg2) + r_bit;
5204      }
5205  }
5206  
5207  void helper_msa_srlr_b(CPUMIPSState *env,
5208                         uint32_t wd, uint32_t ws, uint32_t wt)
5209  {
5210      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5211      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5212      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5213  
5214      pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5215      pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5216      pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5217      pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5218      pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5219      pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5220      pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5221      pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5222      pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5223      pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5224      pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
5225      pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
5226      pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
5227      pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
5228      pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
5229      pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
5230  }
5231  
5232  void helper_msa_srlr_h(CPUMIPSState *env,
5233                         uint32_t wd, uint32_t ws, uint32_t wt)
5234  {
5235      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5236      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5237      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5238  
5239      pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
5240      pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
5241      pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
5242      pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
5243      pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
5244      pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
5245      pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
5246      pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
5247  }
5248  
5249  void helper_msa_srlr_w(CPUMIPSState *env,
5250                         uint32_t wd, uint32_t ws, uint32_t wt)
5251  {
5252      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5253      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5254      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5255  
5256      pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
5257      pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
5258      pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
5259      pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
5260  }
5261  
5262  void helper_msa_srlr_d(CPUMIPSState *env,
5263                         uint32_t wd, uint32_t ws, uint32_t wt)
5264  {
5265      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5266      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5267      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5268  
5269      pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5270      pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5271  }
5272  
5273  
5274  #define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
5275  void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
5276          uint32_t i8)                                                    \
5277  {                                                                       \
5278      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5279      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5280      uint32_t i;                                                         \
5281      for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
5282          DEST = OPERATION;                                               \
5283      }                                                                   \
5284  }
5285  
5286  MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
5287  MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
5288  MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
5289  MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
5290  
5291  #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
5292              UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
5293  MSA_FN_IMM8(bmnzi_b, pwd->b[i],
5294          BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5295  
5296  #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
5297              UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
5298  MSA_FN_IMM8(bmzi_b, pwd->b[i],
5299          BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5300  
5301  #define BIT_SELECT(dest, arg1, arg2, df) \
5302              UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
5303  MSA_FN_IMM8(bseli_b, pwd->b[i],
5304          BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
5305  
5306  #undef BIT_SELECT
5307  #undef BIT_MOVE_IF_ZERO
5308  #undef BIT_MOVE_IF_NOT_ZERO
5309  #undef MSA_FN_IMM8
5310  
5311  #define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
5312  
5313  void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5314                         uint32_t ws, uint32_t imm)
5315  {
5316      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5317      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5318      wr_t wx, *pwx = &wx;
5319      uint32_t i;
5320  
5321      switch (df) {
5322      case DF_BYTE:
5323          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5324              pwx->b[i] = pws->b[SHF_POS(i, imm)];
5325          }
5326          break;
5327      case DF_HALF:
5328          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5329              pwx->h[i] = pws->h[SHF_POS(i, imm)];
5330          }
5331          break;
5332      case DF_WORD:
5333          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5334              pwx->w[i] = pws->w[SHF_POS(i, imm)];
5335          }
5336          break;
5337      default:
5338          g_assert_not_reached();
5339      }
5340      msa_move_v(pwd, pwx);
5341  }
5342  
5343  #define MSA_BINOP_IMM_DF(helper, func)                                  \
5344  void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5345                          uint32_t wd, uint32_t ws, int32_t u5)           \
5346  {                                                                       \
5347      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5348      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5349      uint32_t i;                                                         \
5350                                                                          \
5351      switch (df) {                                                       \
5352      case DF_BYTE:                                                       \
5353          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5354              pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5355          }                                                               \
5356          break;                                                          \
5357      case DF_HALF:                                                       \
5358          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5359              pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5360          }                                                               \
5361          break;                                                          \
5362      case DF_WORD:                                                       \
5363          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5364              pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5365          }                                                               \
5366          break;                                                          \
5367      case DF_DOUBLE:                                                     \
5368          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5369              pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5370          }                                                               \
5371          break;                                                          \
5372      default:                                                            \
5373          g_assert_not_reached();                                         \
5374      }                                                                   \
5375  }
5376  
5377  MSA_BINOP_IMM_DF(addvi, addv)
5378  MSA_BINOP_IMM_DF(subvi, subv)
5379  MSA_BINOP_IMM_DF(ceqi, ceq)
5380  MSA_BINOP_IMM_DF(clei_s, cle_s)
5381  MSA_BINOP_IMM_DF(clei_u, cle_u)
5382  MSA_BINOP_IMM_DF(clti_s, clt_s)
5383  MSA_BINOP_IMM_DF(clti_u, clt_u)
5384  MSA_BINOP_IMM_DF(maxi_s, max_s)
5385  MSA_BINOP_IMM_DF(maxi_u, max_u)
5386  MSA_BINOP_IMM_DF(mini_s, min_s)
5387  MSA_BINOP_IMM_DF(mini_u, min_u)
5388  #undef MSA_BINOP_IMM_DF
5389  
5390  void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5391                         int32_t s10)
5392  {
5393      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5394      uint32_t i;
5395  
5396      switch (df) {
5397      case DF_BYTE:
5398          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5399              pwd->b[i] = (int8_t)s10;
5400          }
5401          break;
5402      case DF_HALF:
5403          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5404              pwd->h[i] = (int16_t)s10;
5405          }
5406          break;
5407      case DF_WORD:
5408          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5409              pwd->w[i] = (int32_t)s10;
5410          }
5411          break;
5412      case DF_DOUBLE:
5413          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5414              pwd->d[i] = (int64_t)s10;
5415          }
5416         break;
5417      default:
5418          g_assert_not_reached();
5419      }
5420  }
5421  
5422  static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
5423  {
5424      return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
5425                                      arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
5426                                                               arg;
5427  }
5428  
5429  static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
5430  {
5431      uint64_t u_arg = UNSIGNED(arg, df);
5432      return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
5433                                          M_MAX_UINT(m + 1);
5434  }
5435  
5436  #define MSA_BINOP_IMMU_DF(helper, func)                                  \
5437  void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5438                         uint32_t ws, uint32_t u5)                        \
5439  {                                                                       \
5440      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5441      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5442      uint32_t i;                                                         \
5443                                                                          \
5444      switch (df) {                                                       \
5445      case DF_BYTE:                                                       \
5446          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5447              pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5448          }                                                               \
5449          break;                                                          \
5450      case DF_HALF:                                                       \
5451          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5452              pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5453          }                                                               \
5454          break;                                                          \
5455      case DF_WORD:                                                       \
5456          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5457              pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5458          }                                                               \
5459          break;                                                          \
5460      case DF_DOUBLE:                                                     \
5461          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5462              pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5463          }                                                               \
5464          break;                                                          \
5465      default:                                                            \
5466          g_assert_not_reached();                                         \
5467      }                                                                   \
5468  }
5469  
5470  MSA_BINOP_IMMU_DF(slli, sll)
5471  MSA_BINOP_IMMU_DF(srai, sra)
5472  MSA_BINOP_IMMU_DF(srli, srl)
5473  MSA_BINOP_IMMU_DF(bclri, bclr)
5474  MSA_BINOP_IMMU_DF(bseti, bset)
5475  MSA_BINOP_IMMU_DF(bnegi, bneg)
5476  MSA_BINOP_IMMU_DF(sat_s, sat_s)
5477  MSA_BINOP_IMMU_DF(sat_u, sat_u)
5478  MSA_BINOP_IMMU_DF(srari, srar)
5479  MSA_BINOP_IMMU_DF(srlri, srlr)
5480  #undef MSA_BINOP_IMMU_DF
5481  
5482  #define MSA_TEROP_IMMU_DF(helper, func)                                  \
5483  void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5484                                    uint32_t wd, uint32_t ws, uint32_t u5) \
5485  {                                                                       \
5486      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5487      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5488      uint32_t i;                                                         \
5489                                                                          \
5490      switch (df) {                                                       \
5491      case DF_BYTE:                                                       \
5492          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5493              pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
5494                                              u5);                        \
5495          }                                                               \
5496          break;                                                          \
5497      case DF_HALF:                                                       \
5498          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5499              pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
5500                                              u5);                        \
5501          }                                                               \
5502          break;                                                          \
5503      case DF_WORD:                                                       \
5504          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5505              pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
5506                                              u5);                        \
5507          }                                                               \
5508          break;                                                          \
5509      case DF_DOUBLE:                                                     \
5510          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5511              pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
5512                                              u5);                        \
5513          }                                                               \
5514          break;                                                          \
5515      default:                                                            \
5516          g_assert_not_reached();                                         \
5517      }                                                                   \
5518  }
5519  
5520  MSA_TEROP_IMMU_DF(binsli, binsl)
5521  MSA_TEROP_IMMU_DF(binsri, binsr)
5522  #undef MSA_TEROP_IMMU_DF
5523  
5524  #define CONCATENATE_AND_SLIDE(s, k)             \
5525      do {                                        \
5526          for (i = 0; i < s; i++) {               \
5527              v[i]     = pws->b[s * k + i];       \
5528              v[i + s] = pwd->b[s * k + i];       \
5529          }                                       \
5530          for (i = 0; i < s; i++) {               \
5531              pwd->b[s * k + i] = v[i + n];       \
5532          }                                       \
5533      } while (0)
5534  
5535  static inline void msa_sld_df(uint32_t df, wr_t *pwd,
5536                                wr_t *pws, target_ulong rt)
5537  {
5538      uint32_t n = rt % DF_ELEMENTS(df);
5539      uint8_t v[64];
5540      uint32_t i, k;
5541  
5542      switch (df) {
5543      case DF_BYTE:
5544          CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
5545          break;
5546      case DF_HALF:
5547          for (k = 0; k < 2; k++) {
5548              CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
5549          }
5550          break;
5551      case DF_WORD:
5552          for (k = 0; k < 4; k++) {
5553              CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
5554          }
5555          break;
5556      case DF_DOUBLE:
5557          for (k = 0; k < 8; k++) {
5558              CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
5559          }
5560          break;
5561      default:
5562          g_assert_not_reached();
5563      }
5564  }
5565  
5566  static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5567  {
5568      int64_t q_min = DF_MIN_INT(df);
5569      int64_t q_max = DF_MAX_INT(df);
5570  
5571      if (arg1 == q_min && arg2 == q_min) {
5572          return q_max;
5573      }
5574      return (arg1 * arg2) >> (DF_BITS(df) - 1);
5575  }
5576  
5577  static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5578  {
5579      int64_t q_min = DF_MIN_INT(df);
5580      int64_t q_max = DF_MAX_INT(df);
5581      int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5582  
5583      if (arg1 == q_min && arg2 == q_min) {
5584          return q_max;
5585      }
5586      return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
5587  }
5588  
5589  #define MSA_BINOP_DF(func) \
5590  void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
5591                                  uint32_t wd, uint32_t ws, uint32_t wt)  \
5592  {                                                                       \
5593      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5594      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5595      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
5596                                                                          \
5597      switch (df) {                                                       \
5598      case DF_BYTE:                                                       \
5599          pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
5600          pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
5601          pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
5602          pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
5603          pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
5604          pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
5605          pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
5606          pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
5607          pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
5608          pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
5609          pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
5610          pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
5611          pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
5612          pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
5613          pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
5614          pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
5615          break;                                                          \
5616      case DF_HALF:                                                       \
5617          pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
5618          pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
5619          pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
5620          pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
5621          pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
5622          pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
5623          pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
5624          pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
5625          break;                                                          \
5626      case DF_WORD:                                                       \
5627          pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
5628          pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
5629          pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
5630          pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
5631          break;                                                          \
5632      case DF_DOUBLE:                                                     \
5633          pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
5634          pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
5635          break;                                                          \
5636      default:                                                            \
5637          g_assert_not_reached();                                         \
5638      }                                                                   \
5639  }
5640  
5641  MSA_BINOP_DF(mul_q)
5642  MSA_BINOP_DF(mulr_q)
5643  #undef MSA_BINOP_DF
5644  
5645  void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5646                         uint32_t ws, uint32_t rt)
5647  {
5648      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5649      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5650  
5651      msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
5652  }
5653  
5654  static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
5655                                      int64_t arg2)
5656  {
5657      int64_t q_prod, q_ret;
5658  
5659      int64_t q_max = DF_MAX_INT(df);
5660      int64_t q_min = DF_MIN_INT(df);
5661  
5662      q_prod = arg1 * arg2;
5663      q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
5664  
5665      return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5666  }
5667  
5668  static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
5669                                      int64_t arg2)
5670  {
5671      int64_t q_prod, q_ret;
5672  
5673      int64_t q_max = DF_MAX_INT(df);
5674      int64_t q_min = DF_MIN_INT(df);
5675  
5676      q_prod = arg1 * arg2;
5677      q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
5678  
5679      return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5680  }
5681  
5682  static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5683                                       int64_t arg2)
5684  {
5685      int64_t q_prod, q_ret;
5686  
5687      int64_t q_max = DF_MAX_INT(df);
5688      int64_t q_min = DF_MIN_INT(df);
5689      int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5690  
5691      q_prod = arg1 * arg2;
5692      q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
5693  
5694      return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5695  }
5696  
5697  static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5698                                       int64_t arg2)
5699  {
5700      int64_t q_prod, q_ret;
5701  
5702      int64_t q_max = DF_MAX_INT(df);
5703      int64_t q_min = DF_MIN_INT(df);
5704      int64_t r_bit = 1LL << (DF_BITS(df) - 2);
5705  
5706      q_prod = arg1 * arg2;
5707      q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
5708  
5709      return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5710  }
5711  
5712  #define MSA_TEROP_DF(func) \
5713  void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
5714                                  uint32_t ws, uint32_t wt)                     \
5715  {                                                                             \
5716      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
5717      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
5718      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
5719                                                                                \
5720      switch (df) {                                                             \
5721      case DF_BYTE:                                                             \
5722          pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
5723                                               pwt->b[0]);                      \
5724          pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
5725                                               pwt->b[1]);                      \
5726          pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
5727                                               pwt->b[2]);                      \
5728          pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
5729                                               pwt->b[3]);                      \
5730          pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
5731                                               pwt->b[4]);                      \
5732          pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
5733                                               pwt->b[5]);                      \
5734          pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
5735                                               pwt->b[6]);                      \
5736          pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
5737                                               pwt->b[7]);                      \
5738          pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
5739                                               pwt->b[8]);                      \
5740          pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
5741                                               pwt->b[9]);                      \
5742          pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
5743                                               pwt->b[10]);                     \
5744          pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
5745                                               pwt->b[11]);                     \
5746          pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
5747                                               pwt->b[12]);                     \
5748          pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
5749                                               pwt->b[13]);                     \
5750          pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
5751                                               pwt->b[14]);                     \
5752          pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
5753                                               pwt->b[15]);                     \
5754          break;                                                                \
5755      case DF_HALF:                                                             \
5756          pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
5757          pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
5758          pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
5759          pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
5760          pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
5761          pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
5762          pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
5763          pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
5764          break;                                                                \
5765      case DF_WORD:                                                             \
5766          pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
5767          pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
5768          pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
5769          pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
5770          break;                                                                \
5771      case DF_DOUBLE:                                                           \
5772          pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
5773          pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
5774          break;                                                                \
5775      default:                                                                  \
5776          g_assert_not_reached();                                               \
5777      }                                                                         \
5778  }
5779  
5780  MSA_TEROP_DF(binsl)
5781  MSA_TEROP_DF(binsr)
5782  MSA_TEROP_DF(madd_q)
5783  MSA_TEROP_DF(msub_q)
5784  MSA_TEROP_DF(maddr_q)
5785  MSA_TEROP_DF(msubr_q)
5786  #undef MSA_TEROP_DF
5787  
5788  static inline void msa_splat_df(uint32_t df, wr_t *pwd,
5789                                  wr_t *pws, target_ulong rt)
5790  {
5791      uint32_t n = rt % DF_ELEMENTS(df);
5792      uint32_t i;
5793  
5794      switch (df) {
5795      case DF_BYTE:
5796          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5797              pwd->b[i] = pws->b[n];
5798          }
5799          break;
5800      case DF_HALF:
5801          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5802              pwd->h[i] = pws->h[n];
5803          }
5804          break;
5805      case DF_WORD:
5806          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5807              pwd->w[i] = pws->w[n];
5808          }
5809          break;
5810      case DF_DOUBLE:
5811          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5812              pwd->d[i] = pws->d[n];
5813          }
5814         break;
5815      default:
5816          g_assert_not_reached();
5817      }
5818  }
5819  
5820  void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5821                           uint32_t ws, uint32_t rt)
5822  {
5823      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5824      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5825  
5826      msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
5827  }
5828  
5829  #define MSA_DO_B MSA_DO(b)
5830  #define MSA_DO_H MSA_DO(h)
5831  #define MSA_DO_W MSA_DO(w)
5832  #define MSA_DO_D MSA_DO(d)
5833  
5834  #define MSA_LOOP_B MSA_LOOP(B)
5835  #define MSA_LOOP_H MSA_LOOP(H)
5836  #define MSA_LOOP_W MSA_LOOP(W)
5837  #define MSA_LOOP_D MSA_LOOP(D)
5838  
5839  #define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
5840  #define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
5841  #define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
5842  #define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
5843  
5844  #define MSA_LOOP(DF) \
5845      do { \
5846          for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
5847              MSA_DO_ ## DF; \
5848          } \
5849      } while (0)
5850  
5851  #define MSA_FN_DF(FUNC)                                             \
5852  void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5853          uint32_t ws, uint32_t wt)                                   \
5854  {                                                                   \
5855      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
5856      wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
5857      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
5858      wr_t wx, *pwx = &wx;                                            \
5859      uint32_t i;                                                     \
5860      switch (df) {                                                   \
5861      case DF_BYTE:                                                   \
5862          MSA_LOOP_B;                                                 \
5863          break;                                                      \
5864      case DF_HALF:                                                   \
5865          MSA_LOOP_H;                                                 \
5866          break;                                                      \
5867      case DF_WORD:                                                   \
5868          MSA_LOOP_W;                                                 \
5869          break;                                                      \
5870      case DF_DOUBLE:                                                 \
5871          MSA_LOOP_D;                                                 \
5872          break;                                                      \
5873      default:                                                        \
5874          g_assert_not_reached();                                     \
5875      }                                                               \
5876      msa_move_v(pwd, pwx);                                           \
5877  }
5878  
5879  #define MSA_LOOP_COND(DF) \
5880              (DF_ELEMENTS(DF) / 2)
5881  
5882  #define Rb(pwr, i) (pwr->b[i])
5883  #define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
5884  #define Rh(pwr, i) (pwr->h[i])
5885  #define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
5886  #define Rw(pwr, i) (pwr->w[i])
5887  #define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
5888  #define Rd(pwr, i) (pwr->d[i])
5889  #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
5890  
5891  #undef MSA_LOOP_COND
5892  
5893  #define MSA_LOOP_COND(DF) \
5894              (DF_ELEMENTS(DF))
5895  
5896  #define MSA_DO(DF)                                                          \
5897      do {                                                                    \
5898          uint32_t n = DF_ELEMENTS(df);                                       \
5899          uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
5900          pwx->DF[i] =                                                        \
5901              (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
5902      } while (0)
5903  MSA_FN_DF(vshf_df)
5904  #undef MSA_DO
5905  #undef MSA_LOOP_COND
5906  #undef MSA_FN_DF
5907  
5908  
5909  void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5910                          uint32_t ws, uint32_t n)
5911  {
5912      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5913      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5914  
5915      msa_sld_df(df, pwd, pws, n);
5916  }
5917  
5918  void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5919                            uint32_t ws, uint32_t n)
5920  {
5921      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5922      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5923  
5924      msa_splat_df(df, pwd, pws, n);
5925  }
5926  
5927  void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
5928                           uint32_t ws, uint32_t n)
5929  {
5930      n %= 16;
5931  #if HOST_BIG_ENDIAN
5932      if (n < 8) {
5933          n = 8 - n - 1;
5934      } else {
5935          n = 24 - n - 1;
5936      }
5937  #endif
5938      env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
5939  }
5940  
5941  void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
5942                           uint32_t ws, uint32_t n)
5943  {
5944      n %= 8;
5945  #if HOST_BIG_ENDIAN
5946      if (n < 4) {
5947          n = 4 - n - 1;
5948      } else {
5949          n = 12 - n - 1;
5950      }
5951  #endif
5952      env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
5953  }
5954  
5955  void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
5956                           uint32_t ws, uint32_t n)
5957  {
5958      n %= 4;
5959  #if HOST_BIG_ENDIAN
5960      if (n < 2) {
5961          n = 2 - n - 1;
5962      } else {
5963          n = 6 - n - 1;
5964      }
5965  #endif
5966      env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
5967  }
5968  
5969  void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
5970                           uint32_t ws, uint32_t n)
5971  {
5972      n %= 2;
5973      env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
5974  }
5975  
5976  void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
5977                           uint32_t ws, uint32_t n)
5978  {
5979      n %= 16;
5980  #if HOST_BIG_ENDIAN
5981      if (n < 8) {
5982          n = 8 - n - 1;
5983      } else {
5984          n = 24 - n - 1;
5985      }
5986  #endif
5987      env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
5988  }
5989  
5990  void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
5991                           uint32_t ws, uint32_t n)
5992  {
5993      n %= 8;
5994  #if HOST_BIG_ENDIAN
5995      if (n < 4) {
5996          n = 4 - n - 1;
5997      } else {
5998          n = 12 - n - 1;
5999      }
6000  #endif
6001      env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
6002  }
6003  
6004  void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
6005                           uint32_t ws, uint32_t n)
6006  {
6007      n %= 4;
6008  #if HOST_BIG_ENDIAN
6009      if (n < 2) {
6010          n = 2 - n - 1;
6011      } else {
6012          n = 6 - n - 1;
6013      }
6014  #endif
6015      env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
6016  }
6017  
6018  void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
6019                            uint32_t rs_num, uint32_t n)
6020  {
6021      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6022      target_ulong rs = env->active_tc.gpr[rs_num];
6023      n %= 16;
6024  #if HOST_BIG_ENDIAN
6025      if (n < 8) {
6026          n = 8 - n - 1;
6027      } else {
6028          n = 24 - n - 1;
6029      }
6030  #endif
6031      pwd->b[n] = (int8_t)rs;
6032  }
6033  
6034  void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
6035                            uint32_t rs_num, uint32_t n)
6036  {
6037      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6038      target_ulong rs = env->active_tc.gpr[rs_num];
6039      n %= 8;
6040  #if HOST_BIG_ENDIAN
6041      if (n < 4) {
6042          n = 4 - n - 1;
6043      } else {
6044          n = 12 - n - 1;
6045      }
6046  #endif
6047      pwd->h[n] = (int16_t)rs;
6048  }
6049  
6050  void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
6051                            uint32_t rs_num, uint32_t n)
6052  {
6053      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6054      target_ulong rs = env->active_tc.gpr[rs_num];
6055      n %= 4;
6056  #if HOST_BIG_ENDIAN
6057      if (n < 2) {
6058          n = 2 - n - 1;
6059      } else {
6060          n = 6 - n - 1;
6061      }
6062  #endif
6063      pwd->w[n] = (int32_t)rs;
6064  }
6065  
6066  void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
6067                            uint32_t rs_num, uint32_t n)
6068  {
6069      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6070      target_ulong rs = env->active_tc.gpr[rs_num];
6071      n %= 2;
6072      pwd->d[n] = (int64_t)rs;
6073  }
6074  
6075  void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6076                           uint32_t ws, uint32_t n)
6077  {
6078      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6079      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6080  
6081      switch (df) {
6082      case DF_BYTE:
6083          pwd->b[n] = (int8_t)pws->b[0];
6084          break;
6085      case DF_HALF:
6086          pwd->h[n] = (int16_t)pws->h[0];
6087          break;
6088      case DF_WORD:
6089          pwd->w[n] = (int32_t)pws->w[0];
6090          break;
6091      case DF_DOUBLE:
6092          pwd->d[n] = (int64_t)pws->d[0];
6093          break;
6094      default:
6095          g_assert_not_reached();
6096      }
6097  }
6098  
6099  void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
6100  {
6101      switch (cd) {
6102      case 0:
6103          break;
6104      case 1:
6105          env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
6106          restore_msa_fp_status(env);
6107          /* check exception */
6108          if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
6109              & GET_FP_CAUSE(env->active_tc.msacsr)) {
6110              do_raise_exception(env, EXCP_MSAFPE, GETPC());
6111          }
6112          break;
6113      }
6114  }
6115  
6116  target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
6117  {
6118      switch (cs) {
6119      case 0:
6120          return env->msair;
6121      case 1:
6122          return env->active_tc.msacsr & MSACSR_MASK;
6123      }
6124      return 0;
6125  }
6126  
6127  void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6128                          uint32_t rs)
6129  {
6130      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6131      uint32_t i;
6132  
6133      switch (df) {
6134      case DF_BYTE:
6135          for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
6136              pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
6137          }
6138          break;
6139      case DF_HALF:
6140          for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
6141              pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
6142          }
6143          break;
6144      case DF_WORD:
6145          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6146              pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
6147          }
6148          break;
6149      case DF_DOUBLE:
6150          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6151              pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
6152          }
6153         break;
6154      default:
6155          g_assert_not_reached();
6156      }
6157  }
6158  
6159  
6160  #define FLOAT_ONE32 make_float32(0x3f8 << 20)
6161  #define FLOAT_ONE64 make_float64(0x3ffULL << 52)
6162  
6163  #define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
6164          /* 0x7c20 */
6165  #define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
6166          /* 0x7f800020 */
6167  #define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
6168          /* 0x7ff0000000000020 */
6169  
6170  static inline void clear_msacsr_cause(CPUMIPSState *env)
6171  {
6172      SET_FP_CAUSE(env->active_tc.msacsr, 0);
6173  }
6174  
6175  static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
6176  {
6177      if ((GET_FP_CAUSE(env->active_tc.msacsr) &
6178              (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
6179          UPDATE_FP_FLAGS(env->active_tc.msacsr,
6180                  GET_FP_CAUSE(env->active_tc.msacsr));
6181      } else {
6182          do_raise_exception(env, EXCP_MSAFPE, retaddr);
6183      }
6184  }
6185  
6186  /* Flush-to-zero use cases for update_msacsr() */
6187  #define CLEAR_FS_UNDERFLOW 1
6188  #define CLEAR_IS_INEXACT   2
6189  #define RECIPROCAL_INEXACT 4
6190  
6191  
6192  static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
6193  {
6194      int mips_xcpt = 0;
6195  
6196      if (ieee_xcpt & float_flag_invalid) {
6197          mips_xcpt |= FP_INVALID;
6198      }
6199      if (ieee_xcpt & float_flag_overflow) {
6200          mips_xcpt |= FP_OVERFLOW;
6201      }
6202      if (ieee_xcpt & float_flag_underflow) {
6203          mips_xcpt |= FP_UNDERFLOW;
6204      }
6205      if (ieee_xcpt & float_flag_divbyzero) {
6206          mips_xcpt |= FP_DIV0;
6207      }
6208      if (ieee_xcpt & float_flag_inexact) {
6209          mips_xcpt |= FP_INEXACT;
6210      }
6211  
6212      return mips_xcpt;
6213  }
6214  
6215  static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
6216  {
6217      int ieee_exception_flags;
6218      int mips_exception_flags = 0;
6219      int cause;
6220      int enable;
6221  
6222      ieee_exception_flags = get_float_exception_flags(
6223                                 &env->active_tc.msa_fp_status);
6224  
6225      /* QEMU softfloat does not signal all underflow cases */
6226      if (denormal) {
6227          ieee_exception_flags |= float_flag_underflow;
6228      }
6229      if (ieee_exception_flags) {
6230          mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
6231      }
6232      enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6233  
6234      /* Set Inexact (I) when flushing inputs to zero */
6235      if ((ieee_exception_flags & float_flag_input_denormal_flushed) &&
6236              (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6237          if (action & CLEAR_IS_INEXACT) {
6238              mips_exception_flags &= ~FP_INEXACT;
6239          } else {
6240              mips_exception_flags |= FP_INEXACT;
6241          }
6242      }
6243  
6244      /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
6245      if ((ieee_exception_flags & float_flag_output_denormal_flushed) &&
6246              (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6247          mips_exception_flags |= FP_INEXACT;
6248          if (action & CLEAR_FS_UNDERFLOW) {
6249              mips_exception_flags &= ~FP_UNDERFLOW;
6250          } else {
6251              mips_exception_flags |= FP_UNDERFLOW;
6252          }
6253      }
6254  
6255      /* Set Inexact (I) when Overflow (O) is not enabled */
6256      if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
6257             (enable & FP_OVERFLOW) == 0) {
6258          mips_exception_flags |= FP_INEXACT;
6259      }
6260  
6261      /* Clear Exact Underflow when Underflow (U) is not enabled */
6262      if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
6263             (enable & FP_UNDERFLOW) == 0 &&
6264             (mips_exception_flags & FP_INEXACT) == 0) {
6265          mips_exception_flags &= ~FP_UNDERFLOW;
6266      }
6267  
6268      /*
6269       * Reciprocal operations set only Inexact when valid and not
6270       * divide by zero
6271       */
6272      if ((action & RECIPROCAL_INEXACT) &&
6273              (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
6274          mips_exception_flags = FP_INEXACT;
6275      }
6276  
6277      cause = mips_exception_flags & enable; /* all current enabled exceptions */
6278  
6279      if (cause == 0) {
6280          /*
6281           * No enabled exception, update the MSACSR Cause
6282           * with all current exceptions
6283           */
6284          SET_FP_CAUSE(env->active_tc.msacsr,
6285              (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6286      } else {
6287          /* Current exceptions are enabled */
6288          if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
6289              /*
6290               * Exception(s) will trap, update MSACSR Cause
6291               * with all enabled exceptions
6292               */
6293              SET_FP_CAUSE(env->active_tc.msacsr,
6294                  (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6295          }
6296      }
6297  
6298      return mips_exception_flags;
6299  }
6300  
6301  static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
6302  {
6303      int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6304      return c & enable;
6305  }
6306  
6307  static inline float16 float16_from_float32(int32_t a, bool ieee,
6308                                             float_status *status)
6309  {
6310        float16 f_val;
6311  
6312        f_val = float32_to_float16((float32)a, ieee, status);
6313  
6314        return a < 0 ? (f_val | (1 << 15)) : f_val;
6315  }
6316  
6317  static inline float32 float32_from_float64(int64_t a, float_status *status)
6318  {
6319        float32 f_val;
6320  
6321        f_val = float64_to_float32((float64)a, status);
6322  
6323        return a < 0 ? (f_val | (1 << 31)) : f_val;
6324  }
6325  
6326  static inline float32 float32_from_float16(int16_t a, bool ieee,
6327                                             float_status *status)
6328  {
6329        float32 f_val;
6330  
6331        f_val = float16_to_float32((float16)a, ieee, status);
6332  
6333        return a < 0 ? (f_val | (1 << 31)) : f_val;
6334  }
6335  
6336  static inline float64 float64_from_float32(int32_t a, float_status *status)
6337  {
6338        float64 f_val;
6339  
6340        f_val = float32_to_float64((float64)a, status);
6341  
6342        return a < 0 ? (f_val | (1ULL << 63)) : f_val;
6343  }
6344  
6345  static inline float32 float32_from_q16(int16_t a, float_status *status)
6346  {
6347      float32 f_val;
6348  
6349      /* conversion as integer and scaling */
6350      f_val = int32_to_float32(a, status);
6351      f_val = float32_scalbn(f_val, -15, status);
6352  
6353      return f_val;
6354  }
6355  
6356  static inline float64 float64_from_q32(int32_t a, float_status *status)
6357  {
6358      float64 f_val;
6359  
6360      /* conversion as integer and scaling */
6361      f_val = int32_to_float64(a, status);
6362      f_val = float64_scalbn(f_val, -31, status);
6363  
6364      return f_val;
6365  }
6366  
6367  static inline int16_t float32_to_q16(float32 a, float_status *status)
6368  {
6369      int32_t q_val;
6370      int32_t q_min = 0xffff8000;
6371      int32_t q_max = 0x00007fff;
6372  
6373      int ieee_ex;
6374  
6375      if (float32_is_any_nan(a)) {
6376          float_raise(float_flag_invalid, status);
6377          return 0;
6378      }
6379  
6380      /* scaling */
6381      a = float32_scalbn(a, 15, status);
6382  
6383      ieee_ex = get_float_exception_flags(status);
6384      set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6385                               , status);
6386  
6387      if (ieee_ex & float_flag_overflow) {
6388          float_raise(float_flag_inexact, status);
6389          return (int32_t)a < 0 ? q_min : q_max;
6390      }
6391  
6392      /* conversion to int */
6393      q_val = float32_to_int32(a, status);
6394  
6395      ieee_ex = get_float_exception_flags(status);
6396      set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6397                               , status);
6398  
6399      if (ieee_ex & float_flag_invalid) {
6400          set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6401                                 , status);
6402          float_raise(float_flag_overflow | float_flag_inexact, status);
6403          return (int32_t)a < 0 ? q_min : q_max;
6404      }
6405  
6406      if (q_val < q_min) {
6407          float_raise(float_flag_overflow | float_flag_inexact, status);
6408          return (int16_t)q_min;
6409      }
6410  
6411      if (q_max < q_val) {
6412          float_raise(float_flag_overflow | float_flag_inexact, status);
6413          return (int16_t)q_max;
6414      }
6415  
6416      return (int16_t)q_val;
6417  }
6418  
6419  static inline int32_t float64_to_q32(float64 a, float_status *status)
6420  {
6421      int64_t q_val;
6422      int64_t q_min = 0xffffffff80000000LL;
6423      int64_t q_max = 0x000000007fffffffLL;
6424  
6425      int ieee_ex;
6426  
6427      if (float64_is_any_nan(a)) {
6428          float_raise(float_flag_invalid, status);
6429          return 0;
6430      }
6431  
6432      /* scaling */
6433      a = float64_scalbn(a, 31, status);
6434  
6435      ieee_ex = get_float_exception_flags(status);
6436      set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6437             , status);
6438  
6439      if (ieee_ex & float_flag_overflow) {
6440          float_raise(float_flag_inexact, status);
6441          return (int64_t)a < 0 ? q_min : q_max;
6442      }
6443  
6444      /* conversion to integer */
6445      q_val = float64_to_int64(a, status);
6446  
6447      ieee_ex = get_float_exception_flags(status);
6448      set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6449             , status);
6450  
6451      if (ieee_ex & float_flag_invalid) {
6452          set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6453                 , status);
6454          float_raise(float_flag_overflow | float_flag_inexact, status);
6455          return (int64_t)a < 0 ? q_min : q_max;
6456      }
6457  
6458      if (q_val < q_min) {
6459          float_raise(float_flag_overflow | float_flag_inexact, status);
6460          return (int32_t)q_min;
6461      }
6462  
6463      if (q_max < q_val) {
6464          float_raise(float_flag_overflow | float_flag_inexact, status);
6465          return (int32_t)q_max;
6466      }
6467  
6468      return (int32_t)q_val;
6469  }
6470  
6471  #define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
6472      do {                                                                    \
6473          float_status *status = &env->active_tc.msa_fp_status;               \
6474          int c;                                                              \
6475          int64_t cond;                                                       \
6476          set_float_exception_flags(0, status);                               \
6477          if (!QUIET) {                                                       \
6478              cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
6479          } else {                                                            \
6480              cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
6481          }                                                                   \
6482          DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
6483          c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
6484                                                                              \
6485          if (get_enabled_exceptions(env, c)) {                               \
6486              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
6487          }                                                                   \
6488      } while (0)
6489  
6490  #define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
6491      do {                                                            \
6492          MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
6493          if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
6494              DEST = 0;                                               \
6495          }                                                           \
6496      } while (0)
6497  
6498  #define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
6499      do {                                                            \
6500          MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6501          if (DEST == 0) {                                            \
6502              MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
6503          }                                                           \
6504      } while (0)
6505  
6506  #define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
6507      do {                                                            \
6508          MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
6509          if (DEST == 0) {                                            \
6510              MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
6511          }                                                           \
6512      } while (0)
6513  
6514  #define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
6515      do {                                                            \
6516          MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6517          if (DEST == 0) {                                            \
6518              MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6519              if (DEST == 0) {                                        \
6520                  MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
6521              }                                                       \
6522          }                                                           \
6523      } while (0)
6524  
6525  #define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
6526      do {                                                            \
6527          MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6528          if (DEST == 0) {                                            \
6529              MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
6530          }                                                           \
6531      } while (0)
6532  
6533  #define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
6534      do {                                                            \
6535          MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6536          if (DEST == 0) {                                            \
6537              MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6538          }                                                           \
6539      } while (0)
6540  
6541  #define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
6542      do {                                                            \
6543          MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
6544          if (DEST == 0) {                                            \
6545              MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
6546          }                                                           \
6547      } while (0)
6548  
6549  static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6550                                wr_t *pwt, uint32_t df, int quiet,
6551                                uintptr_t retaddr)
6552  {
6553      wr_t wx, *pwx = &wx;
6554      uint32_t i;
6555  
6556      clear_msacsr_cause(env);
6557  
6558      switch (df) {
6559      case DF_WORD:
6560          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6561              MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6562          }
6563          break;
6564      case DF_DOUBLE:
6565          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6566              MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6567          }
6568          break;
6569      default:
6570          g_assert_not_reached();
6571      }
6572  
6573      check_msacsr_cause(env, retaddr);
6574  
6575      msa_move_v(pwd, pwx);
6576  }
6577  
6578  static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6579                                wr_t *pwt, uint32_t df, int quiet,
6580                                uintptr_t retaddr)
6581  {
6582      wr_t wx, *pwx = &wx;
6583      uint32_t i;
6584  
6585      clear_msacsr_cause(env);
6586  
6587      switch (df) {
6588      case DF_WORD:
6589          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6590              MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
6591                      quiet);
6592          }
6593          break;
6594      case DF_DOUBLE:
6595          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6596              MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
6597                      quiet);
6598          }
6599          break;
6600      default:
6601          g_assert_not_reached();
6602      }
6603  
6604      check_msacsr_cause(env, retaddr);
6605  
6606      msa_move_v(pwd, pwx);
6607  }
6608  
6609  static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6610                                wr_t *pwt, uint32_t df, int quiet,
6611                                uintptr_t retaddr)
6612  {
6613      wr_t wx, *pwx = &wx;
6614      uint32_t i;
6615  
6616      clear_msacsr_cause(env);
6617  
6618      switch (df) {
6619      case DF_WORD:
6620          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6621              MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
6622          }
6623          break;
6624      case DF_DOUBLE:
6625          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6626              MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
6627          }
6628          break;
6629      default:
6630          g_assert_not_reached();
6631      }
6632  
6633      check_msacsr_cause(env, retaddr);
6634  
6635      msa_move_v(pwd, pwx);
6636  }
6637  
6638  static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6639                                 wr_t *pwt, uint32_t df, int quiet,
6640                                 uintptr_t retaddr)
6641  {
6642      wr_t wx, *pwx = &wx;
6643      uint32_t i;
6644  
6645      clear_msacsr_cause(env);
6646  
6647      switch (df) {
6648      case DF_WORD:
6649          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6650              MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6651          }
6652          break;
6653      case DF_DOUBLE:
6654          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6655              MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6656          }
6657          break;
6658      default:
6659          g_assert_not_reached();
6660      }
6661  
6662      check_msacsr_cause(env, retaddr);
6663  
6664      msa_move_v(pwd, pwx);
6665  }
6666  
6667  static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6668                                wr_t *pwt, uint32_t df, int quiet,
6669                                uintptr_t retaddr)
6670  {
6671      wr_t wx, *pwx = &wx;
6672      uint32_t i;
6673  
6674      clear_msacsr_cause(env);
6675  
6676      switch (df) {
6677      case DF_WORD:
6678          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6679              MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
6680          }
6681          break;
6682      case DF_DOUBLE:
6683          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6684              MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
6685          }
6686          break;
6687      default:
6688          g_assert_not_reached();
6689      }
6690  
6691      check_msacsr_cause(env, retaddr);
6692  
6693      msa_move_v(pwd, pwx);
6694  }
6695  
6696  static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6697                                 wr_t *pwt, uint32_t df, int quiet,
6698                                 uintptr_t retaddr)
6699  {
6700      wr_t wx, *pwx = &wx;
6701      uint32_t i;
6702  
6703      clear_msacsr_cause(env);
6704  
6705      switch (df) {
6706      case DF_WORD:
6707          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6708              MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6709          }
6710          break;
6711      case DF_DOUBLE:
6712          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6713              MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6714          }
6715          break;
6716      default:
6717          g_assert_not_reached();
6718      }
6719  
6720      check_msacsr_cause(env, retaddr);
6721  
6722      msa_move_v(pwd, pwx);
6723  }
6724  
6725  static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6726                                wr_t *pwt, uint32_t df, int quiet,
6727                                uintptr_t retaddr)
6728  {
6729      wr_t wx, *pwx = &wx;
6730      uint32_t i;
6731  
6732      clear_msacsr_cause(env);
6733  
6734      switch (df) {
6735      case DF_WORD:
6736          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6737              MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
6738          }
6739          break;
6740      case DF_DOUBLE:
6741          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6742              MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
6743          }
6744          break;
6745      default:
6746          g_assert_not_reached();
6747      }
6748  
6749      check_msacsr_cause(env, retaddr);
6750  
6751      msa_move_v(pwd, pwx);
6752  }
6753  
6754  static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6755                                 wr_t *pwt, uint32_t df, int quiet,
6756                                 uintptr_t retaddr)
6757  {
6758      wr_t wx, *pwx = &wx;
6759      uint32_t i;
6760  
6761      clear_msacsr_cause(env);
6762  
6763      switch (df) {
6764      case DF_WORD:
6765          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6766              MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6767          }
6768          break;
6769      case DF_DOUBLE:
6770          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6771              MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6772          }
6773          break;
6774      default:
6775          g_assert_not_reached();
6776      }
6777  
6778      check_msacsr_cause(env, retaddr);
6779  
6780      msa_move_v(pwd, pwx);
6781  }
6782  
6783  static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6784                                wr_t *pwt, uint32_t df, int quiet,
6785                                uintptr_t retaddr)
6786  {
6787      wr_t wx, *pwx = &wx;
6788      uint32_t i;
6789  
6790      clear_msacsr_cause(env);
6791  
6792      switch (df) {
6793      case DF_WORD:
6794          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6795              MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6796          }
6797          break;
6798      case DF_DOUBLE:
6799          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6800              MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6801          }
6802          break;
6803      default:
6804          g_assert_not_reached();
6805      }
6806  
6807      check_msacsr_cause(env, retaddr);
6808  
6809      msa_move_v(pwd, pwx);
6810  }
6811  
6812  static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6813                                 wr_t *pwt, uint32_t df, int quiet,
6814                                 uintptr_t retaddr)
6815  {
6816      wr_t wx, *pwx = &wx;
6817      uint32_t i;
6818  
6819      clear_msacsr_cause(env);
6820  
6821      switch (df) {
6822      case DF_WORD:
6823          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6824              MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6825          }
6826          break;
6827      case DF_DOUBLE:
6828          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6829              MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6830          }
6831          break;
6832      default:
6833          g_assert_not_reached();
6834      }
6835  
6836      check_msacsr_cause(env, retaddr);
6837  
6838      msa_move_v(pwd, pwx);
6839  }
6840  
6841  static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6842                                wr_t *pwt, uint32_t df, int quiet,
6843                                uintptr_t retaddr)
6844  {
6845      wr_t wx, *pwx = &wx;
6846      uint32_t i;
6847  
6848      clear_msacsr_cause(env);
6849  
6850      switch (df) {
6851      case DF_WORD:
6852          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6853              MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6854          }
6855          break;
6856      case DF_DOUBLE:
6857          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6858              MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6859          }
6860          break;
6861      default:
6862          g_assert_not_reached();
6863      }
6864  
6865      check_msacsr_cause(env, retaddr);
6866  
6867      msa_move_v(pwd, pwx);
6868  }
6869  
6870  void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6871                          uint32_t ws, uint32_t wt)
6872  {
6873      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6874      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6875      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6876      compare_af(env, pwd, pws, pwt, df, 1, GETPC());
6877  }
6878  
6879  void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6880                          uint32_t ws, uint32_t wt)
6881  {
6882      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6883      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6884      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6885      compare_un(env, pwd, pws, pwt, df, 1, GETPC());
6886  }
6887  
6888  void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6889                          uint32_t ws, uint32_t wt)
6890  {
6891      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6892      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6893      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6894      compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
6895  }
6896  
6897  void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6898                           uint32_t ws, uint32_t wt)
6899  {
6900      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6901      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6902      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6903      compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
6904  }
6905  
6906  void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6907                          uint32_t ws, uint32_t wt)
6908  {
6909      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6910      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6911      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6912      compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
6913  }
6914  
6915  void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6916                           uint32_t ws, uint32_t wt)
6917  {
6918      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6919      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6920      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6921      compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
6922  }
6923  
6924  void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6925                          uint32_t ws, uint32_t wt)
6926  {
6927      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6928      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6929      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6930      compare_le(env, pwd, pws, pwt, df, 1, GETPC());
6931  }
6932  
6933  void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6934                           uint32_t ws, uint32_t wt)
6935  {
6936      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6937      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6938      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6939      compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
6940  }
6941  
6942  void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6943                          uint32_t ws, uint32_t wt)
6944  {
6945      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6946      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6947      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6948      compare_af(env, pwd, pws, pwt, df, 0, GETPC());
6949  }
6950  
6951  void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6952                          uint32_t ws, uint32_t wt)
6953  {
6954      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6955      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6956      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6957      compare_un(env, pwd, pws, pwt, df, 0, GETPC());
6958  }
6959  
6960  void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6961                          uint32_t ws, uint32_t wt)
6962  {
6963      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6964      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6965      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6966      compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
6967  }
6968  
6969  void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6970                           uint32_t ws, uint32_t wt)
6971  {
6972      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6973      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6974      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6975      compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
6976  }
6977  
6978  void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6979                          uint32_t ws, uint32_t wt)
6980  {
6981      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6982      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6983      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6984      compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
6985  }
6986  
6987  void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6988                           uint32_t ws, uint32_t wt)
6989  {
6990      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6991      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6992      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6993      compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
6994  }
6995  
6996  void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6997                          uint32_t ws, uint32_t wt)
6998  {
6999      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7000      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7001      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7002      compare_le(env, pwd, pws, pwt, df, 0, GETPC());
7003  }
7004  
7005  void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7006                           uint32_t ws, uint32_t wt)
7007  {
7008      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7009      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7010      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7011      compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
7012  }
7013  
7014  void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7015                          uint32_t ws, uint32_t wt)
7016  {
7017      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7018      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7019      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7020      compare_or(env, pwd, pws, pwt, df, 1, GETPC());
7021  }
7022  
7023  void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7024                           uint32_t ws, uint32_t wt)
7025  {
7026      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7027      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7028      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7029      compare_une(env, pwd, pws, pwt, df, 1, GETPC());
7030  }
7031  
7032  void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7033                          uint32_t ws, uint32_t wt)
7034  {
7035      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7036      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7037      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7038      compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
7039  }
7040  
7041  void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7042                          uint32_t ws, uint32_t wt)
7043  {
7044      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7045      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7046      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7047      compare_or(env, pwd, pws, pwt, df, 0, GETPC());
7048  }
7049  
7050  void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7051                           uint32_t ws, uint32_t wt)
7052  {
7053      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7054      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7055      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7056      compare_une(env, pwd, pws, pwt, df, 0, GETPC());
7057  }
7058  
7059  void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7060                          uint32_t ws, uint32_t wt)
7061  {
7062      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7063      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7064      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7065      compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
7066  }
7067  
7068  #define float16_is_zero(ARG) 0
7069  #define float16_is_zero_or_denormal(ARG) 0
7070  
7071  #define IS_DENORMAL(ARG, BITS)                      \
7072      (!float ## BITS ## _is_zero(ARG)                \
7073      && float ## BITS ## _is_zero_or_denormal(ARG))
7074  
7075  #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
7076      do {                                                                    \
7077          float_status *status = &env->active_tc.msa_fp_status;               \
7078          int c;                                                              \
7079                                                                              \
7080          set_float_exception_flags(0, status);                               \
7081          DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7082          c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7083                                                                              \
7084          if (get_enabled_exceptions(env, c)) {                               \
7085              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7086          }                                                                   \
7087      } while (0)
7088  
7089  void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7090          uint32_t ws, uint32_t wt)
7091  {
7092      wr_t wx, *pwx = &wx;
7093      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7094      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7095      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7096      uint32_t i;
7097  
7098      clear_msacsr_cause(env);
7099  
7100      switch (df) {
7101      case DF_WORD:
7102          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7103              MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
7104          }
7105          break;
7106      case DF_DOUBLE:
7107          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7108              MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
7109          }
7110          break;
7111      default:
7112          g_assert_not_reached();
7113      }
7114  
7115      check_msacsr_cause(env, GETPC());
7116      msa_move_v(pwd, pwx);
7117  }
7118  
7119  void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7120          uint32_t ws, uint32_t wt)
7121  {
7122      wr_t wx, *pwx = &wx;
7123      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7124      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7125      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7126      uint32_t i;
7127  
7128      clear_msacsr_cause(env);
7129  
7130      switch (df) {
7131      case DF_WORD:
7132          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7133              MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
7134          }
7135          break;
7136      case DF_DOUBLE:
7137          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7138              MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
7139          }
7140          break;
7141      default:
7142          g_assert_not_reached();
7143      }
7144  
7145      check_msacsr_cause(env, GETPC());
7146      msa_move_v(pwd, pwx);
7147  }
7148  
7149  void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7150          uint32_t ws, uint32_t wt)
7151  {
7152      wr_t wx, *pwx = &wx;
7153      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7154      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7155      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7156      uint32_t i;
7157  
7158      clear_msacsr_cause(env);
7159  
7160      switch (df) {
7161      case DF_WORD:
7162          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7163              MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
7164          }
7165          break;
7166      case DF_DOUBLE:
7167          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7168              MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
7169          }
7170          break;
7171      default:
7172          g_assert_not_reached();
7173      }
7174  
7175      check_msacsr_cause(env, GETPC());
7176  
7177      msa_move_v(pwd, pwx);
7178  }
7179  
7180  void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7181          uint32_t ws, uint32_t wt)
7182  {
7183      wr_t wx, *pwx = &wx;
7184      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7185      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7186      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7187      uint32_t i;
7188  
7189      clear_msacsr_cause(env);
7190  
7191      switch (df) {
7192      case DF_WORD:
7193          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7194              MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
7195          }
7196          break;
7197      case DF_DOUBLE:
7198          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7199              MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
7200          }
7201          break;
7202      default:
7203          g_assert_not_reached();
7204      }
7205  
7206      check_msacsr_cause(env, GETPC());
7207  
7208      msa_move_v(pwd, pwx);
7209  }
7210  
7211  #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
7212      do {                                                                    \
7213          float_status *status = &env->active_tc.msa_fp_status;               \
7214          int c;                                                              \
7215                                                                              \
7216          set_float_exception_flags(0, status);                               \
7217          DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
7218          c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7219                                                                              \
7220          if (get_enabled_exceptions(env, c)) {                               \
7221              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7222          }                                                                   \
7223      } while (0)
7224  
7225  void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7226          uint32_t ws, uint32_t wt)
7227  {
7228      wr_t wx, *pwx = &wx;
7229      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7230      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7231      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7232      uint32_t i;
7233  
7234      clear_msacsr_cause(env);
7235  
7236      switch (df) {
7237      case DF_WORD:
7238          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7239              MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7240                             pws->w[i], pwt->w[i], 0, 32);
7241          }
7242          break;
7243      case DF_DOUBLE:
7244          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7245              MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7246                             pws->d[i], pwt->d[i], 0, 64);
7247          }
7248          break;
7249      default:
7250          g_assert_not_reached();
7251      }
7252  
7253      check_msacsr_cause(env, GETPC());
7254  
7255      msa_move_v(pwd, pwx);
7256  }
7257  
7258  void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7259          uint32_t ws, uint32_t wt)
7260  {
7261      wr_t wx, *pwx = &wx;
7262      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7263      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7264      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7265      uint32_t i;
7266  
7267      clear_msacsr_cause(env);
7268  
7269      switch (df) {
7270      case DF_WORD:
7271          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7272              MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7273                             pws->w[i], pwt->w[i],
7274                             float_muladd_negate_product, 32);
7275        }
7276        break;
7277      case DF_DOUBLE:
7278          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7279              MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7280                             pws->d[i], pwt->d[i],
7281                             float_muladd_negate_product, 64);
7282          }
7283          break;
7284      default:
7285          g_assert_not_reached();
7286      }
7287  
7288      check_msacsr_cause(env, GETPC());
7289  
7290      msa_move_v(pwd, pwx);
7291  }
7292  
7293  void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7294          uint32_t ws, uint32_t wt)
7295  {
7296      wr_t wx, *pwx = &wx;
7297      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7298      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7299      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7300      uint32_t i;
7301  
7302      clear_msacsr_cause(env);
7303  
7304      switch (df) {
7305      case DF_WORD:
7306          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7307              MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
7308                              pwt->w[i] >  0x200 ?  0x200 :
7309                              pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
7310                              32);
7311          }
7312          break;
7313      case DF_DOUBLE:
7314          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7315              MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
7316                              pwt->d[i] >  0x1000 ?  0x1000 :
7317                              pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
7318                              64);
7319          }
7320          break;
7321      default:
7322          g_assert_not_reached();
7323      }
7324  
7325      check_msacsr_cause(env, GETPC());
7326  
7327      msa_move_v(pwd, pwx);
7328  }
7329  
7330  #define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
7331      do {                                                                    \
7332          float_status *status = &env->active_tc.msa_fp_status;               \
7333          int c;                                                              \
7334                                                                              \
7335          set_float_exception_flags(0, status);                               \
7336          DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7337          c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7338                                                                              \
7339          if (get_enabled_exceptions(env, c)) {                               \
7340              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7341          }                                                                   \
7342      } while (0)
7343  
7344  void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7345                           uint32_t ws, uint32_t wt)
7346  {
7347      wr_t wx, *pwx = &wx;
7348      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7349      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7350      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7351      uint32_t i;
7352  
7353      clear_msacsr_cause(env);
7354  
7355      switch (df) {
7356      case DF_WORD:
7357          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7358              /*
7359               * Half precision floats come in two formats: standard
7360               * IEEE and "ARM" format.  The latter gains extra exponent
7361               * range by omitting the NaN/Inf encodings.
7362               */
7363              bool ieee = true;
7364  
7365              MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
7366              MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
7367          }
7368          break;
7369      case DF_DOUBLE:
7370          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7371              MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
7372              MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
7373          }
7374          break;
7375      default:
7376          g_assert_not_reached();
7377      }
7378  
7379      check_msacsr_cause(env, GETPC());
7380      msa_move_v(pwd, pwx);
7381  }
7382  
7383  #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
7384      do {                                                                    \
7385          float_status *status = &env->active_tc.msa_fp_status;               \
7386          int c;                                                              \
7387                                                                              \
7388          set_float_exception_flags(0, status);                               \
7389          DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7390          c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7391                                                                              \
7392          if (get_enabled_exceptions(env, c)) {                               \
7393              DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
7394          }                                                                   \
7395      } while (0)
7396  
7397  void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7398                         uint32_t ws, uint32_t wt)
7399  {
7400      wr_t wx, *pwx = &wx;
7401      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7402      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7403      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7404      uint32_t i;
7405  
7406      clear_msacsr_cause(env);
7407  
7408      switch (df) {
7409      case DF_WORD:
7410          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7411              MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
7412              MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
7413          }
7414          break;
7415      case DF_DOUBLE:
7416          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7417              MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
7418              MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
7419          }
7420          break;
7421      default:
7422          g_assert_not_reached();
7423      }
7424  
7425      check_msacsr_cause(env, GETPC());
7426  
7427      msa_move_v(pwd, pwx);
7428  }
7429  
7430  #define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
7431      !float ## BITS ## _is_any_nan(ARG1)                 \
7432      && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
7433  
7434  #define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
7435      do {                                                                    \
7436          float_status *status_ = &env->active_tc.msa_fp_status;              \
7437          int c;                                                              \
7438                                                                              \
7439          set_float_exception_flags(0, status_);                              \
7440          DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status_);               \
7441          c = update_msacsr(env, 0, 0);                                       \
7442                                                                              \
7443          if (get_enabled_exceptions(env, c)) {                               \
7444              DEST = ((FLOAT_SNAN ## BITS(status_) >> 6) << 6) | c;           \
7445          }                                                                   \
7446      } while (0)
7447  
7448  #define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
7449      do {                                                            \
7450          uint## BITS ##_t S = _S, T = _T;                            \
7451          uint## BITS ##_t as, at, xs, xt, xd;                        \
7452          if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
7453              T = S;                                                  \
7454          }                                                           \
7455          else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
7456              S = T;                                                  \
7457          }                                                           \
7458          as = float## BITS ##_abs(S);                                \
7459          at = float## BITS ##_abs(T);                                \
7460          MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
7461          MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
7462          MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
7463          X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
7464      } while (0)
7465  
7466  void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7467          uint32_t ws, uint32_t wt)
7468  {
7469      float_status *status = &env->active_tc.msa_fp_status;
7470      wr_t wx, *pwx = &wx;
7471      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7472      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7473      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7474  
7475      clear_msacsr_cause(env);
7476  
7477      if (df == DF_WORD) {
7478  
7479          if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7480              MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
7481          } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7482              MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
7483          } else {
7484              MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
7485          }
7486  
7487          if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7488              MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
7489          } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7490              MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
7491          } else {
7492              MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
7493          }
7494  
7495          if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7496              MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
7497          } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7498              MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
7499          } else {
7500              MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
7501          }
7502  
7503          if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7504              MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
7505          } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7506              MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
7507          } else {
7508              MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
7509          }
7510  
7511      } else if (df == DF_DOUBLE) {
7512  
7513          if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7514              MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
7515          } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7516              MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
7517          } else {
7518              MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
7519          }
7520  
7521          if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7522              MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
7523          } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7524              MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
7525          } else {
7526              MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
7527          }
7528  
7529      } else {
7530  
7531          g_assert_not_reached();
7532  
7533      }
7534  
7535      check_msacsr_cause(env, GETPC());
7536  
7537      msa_move_v(pwd, pwx);
7538  }
7539  
7540  void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7541          uint32_t ws, uint32_t wt)
7542  {
7543      float_status *status = &env->active_tc.msa_fp_status;
7544      wr_t wx, *pwx = &wx;
7545      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7546      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7547      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7548  
7549      clear_msacsr_cause(env);
7550  
7551      if (df == DF_WORD) {
7552          FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7553          FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7554          FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7555          FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7556      } else if (df == DF_DOUBLE) {
7557          FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7558          FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7559      } else {
7560          g_assert_not_reached();
7561      }
7562  
7563      check_msacsr_cause(env, GETPC());
7564  
7565      msa_move_v(pwd, pwx);
7566  }
7567  
7568  void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7569          uint32_t ws, uint32_t wt)
7570  {
7571       float_status *status = &env->active_tc.msa_fp_status;
7572      wr_t wx, *pwx = &wx;
7573      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7574      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7575      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7576  
7577      clear_msacsr_cause(env);
7578  
7579      if (df == DF_WORD) {
7580  
7581          if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7582              MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
7583          } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7584              MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
7585          } else {
7586              MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
7587          }
7588  
7589          if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7590              MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
7591          } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7592              MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
7593          } else {
7594              MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
7595          }
7596  
7597          if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7598              MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
7599          } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7600              MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
7601          } else {
7602              MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
7603          }
7604  
7605          if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7606              MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
7607          } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7608              MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
7609          } else {
7610              MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
7611          }
7612  
7613      } else if (df == DF_DOUBLE) {
7614  
7615          if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7616              MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
7617          } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7618              MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
7619          } else {
7620              MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
7621          }
7622  
7623          if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7624              MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
7625          } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7626              MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
7627          } else {
7628              MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
7629          }
7630  
7631      } else {
7632  
7633          g_assert_not_reached();
7634  
7635      }
7636  
7637      check_msacsr_cause(env, GETPC());
7638  
7639      msa_move_v(pwd, pwx);
7640  }
7641  
7642  void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7643          uint32_t ws, uint32_t wt)
7644  {
7645      float_status *status = &env->active_tc.msa_fp_status;
7646      wr_t wx, *pwx = &wx;
7647      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7648      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7649      wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7650  
7651      clear_msacsr_cause(env);
7652  
7653      if (df == DF_WORD) {
7654          FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7655          FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7656          FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7657          FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7658      } else if (df == DF_DOUBLE) {
7659          FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7660          FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7661      } else {
7662          g_assert_not_reached();
7663      }
7664  
7665      check_msacsr_cause(env, GETPC());
7666  
7667      msa_move_v(pwd, pwx);
7668  }
7669  
7670  void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
7671          uint32_t wd, uint32_t ws)
7672  {
7673      float_status *status = &env->active_tc.msa_fp_status;
7674  
7675      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7676      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7677      if (df == DF_WORD) {
7678          pwd->w[0] = float_class_s(pws->w[0], status);
7679          pwd->w[1] = float_class_s(pws->w[1], status);
7680          pwd->w[2] = float_class_s(pws->w[2], status);
7681          pwd->w[3] = float_class_s(pws->w[3], status);
7682      } else if (df == DF_DOUBLE) {
7683          pwd->d[0] = float_class_d(pws->d[0], status);
7684          pwd->d[1] = float_class_d(pws->d[1], status);
7685      } else {
7686          g_assert_not_reached();
7687      }
7688  }
7689  
7690  #define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
7691      do {                                                                    \
7692          float_status *status = &env->active_tc.msa_fp_status;               \
7693          int c;                                                              \
7694                                                                              \
7695          set_float_exception_flags(0, status);                               \
7696          DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7697          c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7698                                                                              \
7699          if (get_enabled_exceptions(env, c)) {                               \
7700              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7701          } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
7702              DEST = 0;                                                       \
7703          }                                                                   \
7704      } while (0)
7705  
7706  void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7707                              uint32_t ws)
7708  {
7709      wr_t wx, *pwx = &wx;
7710      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7711      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7712      uint32_t i;
7713  
7714      clear_msacsr_cause(env);
7715  
7716      switch (df) {
7717      case DF_WORD:
7718          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7719              MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
7720          }
7721          break;
7722      case DF_DOUBLE:
7723          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7724              MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
7725          }
7726          break;
7727      default:
7728          g_assert_not_reached();
7729      }
7730  
7731      check_msacsr_cause(env, GETPC());
7732  
7733      msa_move_v(pwd, pwx);
7734  }
7735  
7736  void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7737                              uint32_t ws)
7738  {
7739      wr_t wx, *pwx = &wx;
7740      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7741      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7742      uint32_t i;
7743  
7744      clear_msacsr_cause(env);
7745  
7746      switch (df) {
7747      case DF_WORD:
7748          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7749              MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
7750          }
7751          break;
7752      case DF_DOUBLE:
7753          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7754              MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
7755          }
7756          break;
7757      default:
7758          g_assert_not_reached();
7759      }
7760  
7761      check_msacsr_cause(env, GETPC());
7762  
7763      msa_move_v(pwd, pwx);
7764  }
7765  
7766  void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7767                           uint32_t ws)
7768  {
7769      wr_t wx, *pwx = &wx;
7770      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7771      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7772      uint32_t i;
7773  
7774      clear_msacsr_cause(env);
7775  
7776      switch (df) {
7777      case DF_WORD:
7778          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7779              MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
7780          }
7781          break;
7782      case DF_DOUBLE:
7783          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7784              MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
7785          }
7786          break;
7787      default:
7788          g_assert_not_reached();
7789      }
7790  
7791      check_msacsr_cause(env, GETPC());
7792  
7793      msa_move_v(pwd, pwx);
7794  }
7795  
7796  #define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
7797      do {                                                                    \
7798          float_status *status = &env->active_tc.msa_fp_status;               \
7799          int c;                                                              \
7800                                                                              \
7801          set_float_exception_flags(0, status);                               \
7802          DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
7803          c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
7804                            float ## BITS ## _is_quiet_nan(DEST, status) ?    \
7805                            0 : RECIPROCAL_INEXACT,                           \
7806                            IS_DENORMAL(DEST, BITS));                         \
7807                                                                              \
7808          if (get_enabled_exceptions(env, c)) {                               \
7809              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7810          }                                                                   \
7811      } while (0)
7812  
7813  void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7814                            uint32_t ws)
7815  {
7816      wr_t wx, *pwx = &wx;
7817      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7818      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7819      uint32_t i;
7820  
7821      clear_msacsr_cause(env);
7822  
7823      switch (df) {
7824      case DF_WORD:
7825          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7826              MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
7827                      &env->active_tc.msa_fp_status), 32);
7828          }
7829          break;
7830      case DF_DOUBLE:
7831          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7832              MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
7833                      &env->active_tc.msa_fp_status), 64);
7834          }
7835          break;
7836      default:
7837          g_assert_not_reached();
7838      }
7839  
7840      check_msacsr_cause(env, GETPC());
7841  
7842      msa_move_v(pwd, pwx);
7843  }
7844  
7845  void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7846                          uint32_t ws)
7847  {
7848      wr_t wx, *pwx = &wx;
7849      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7850      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7851      uint32_t i;
7852  
7853      clear_msacsr_cause(env);
7854  
7855      switch (df) {
7856      case DF_WORD:
7857          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7858              MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
7859          }
7860          break;
7861      case DF_DOUBLE:
7862          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7863              MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
7864          }
7865          break;
7866      default:
7867          g_assert_not_reached();
7868      }
7869  
7870      check_msacsr_cause(env, GETPC());
7871  
7872      msa_move_v(pwd, pwx);
7873  }
7874  
7875  void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7876                           uint32_t ws)
7877  {
7878      wr_t wx, *pwx = &wx;
7879      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7880      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7881      uint32_t i;
7882  
7883      clear_msacsr_cause(env);
7884  
7885      switch (df) {
7886      case DF_WORD:
7887          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7888              MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
7889          }
7890          break;
7891      case DF_DOUBLE:
7892          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7893              MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
7894          }
7895          break;
7896      default:
7897          g_assert_not_reached();
7898      }
7899  
7900      check_msacsr_cause(env, GETPC());
7901  
7902      msa_move_v(pwd, pwx);
7903  }
7904  
7905  #define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
7906      do {                                                                    \
7907          float_status *status = &env->active_tc.msa_fp_status;               \
7908          int c;                                                              \
7909                                                                              \
7910          set_float_exception_flags(0, status);                               \
7911          set_float_rounding_mode(float_round_down, status);                  \
7912          DEST = float ## BITS ## _ ## log2(ARG, status);                     \
7913          DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
7914          set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
7915                                           MSACSR_RM_MASK) >> MSACSR_RM],     \
7916                                  status);                                    \
7917                                                                              \
7918          set_float_exception_flags(get_float_exception_flags(status) &       \
7919                                    (~float_flag_inexact),                    \
7920                                    status);                                  \
7921                                                                              \
7922          c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7923                                                                              \
7924          if (get_enabled_exceptions(env, c)) {                               \
7925              DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7926          }                                                                   \
7927      } while (0)
7928  
7929  void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7930                           uint32_t ws)
7931  {
7932      wr_t wx, *pwx = &wx;
7933      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7934      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7935      uint32_t i;
7936  
7937      clear_msacsr_cause(env);
7938  
7939      switch (df) {
7940      case DF_WORD:
7941          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7942              MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
7943          }
7944          break;
7945      case DF_DOUBLE:
7946          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7947              MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
7948          }
7949          break;
7950      default:
7951          g_assert_not_reached();
7952      }
7953  
7954      check_msacsr_cause(env, GETPC());
7955  
7956      msa_move_v(pwd, pwx);
7957  }
7958  
7959  void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7960                            uint32_t ws)
7961  {
7962      wr_t wx, *pwx = &wx;
7963      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7964      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7965      uint32_t i;
7966  
7967      clear_msacsr_cause(env);
7968  
7969      switch (df) {
7970      case DF_WORD:
7971          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7972              /*
7973               * Half precision floats come in two formats: standard
7974               * IEEE and "ARM" format.  The latter gains extra exponent
7975               * range by omitting the NaN/Inf encodings.
7976               */
7977              bool ieee = true;
7978  
7979              MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
7980          }
7981          break;
7982      case DF_DOUBLE:
7983          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7984              MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
7985          }
7986          break;
7987      default:
7988          g_assert_not_reached();
7989      }
7990  
7991      check_msacsr_cause(env, GETPC());
7992      msa_move_v(pwd, pwx);
7993  }
7994  
7995  void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7996                            uint32_t ws)
7997  {
7998      wr_t wx, *pwx = &wx;
7999      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8000      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8001      uint32_t i;
8002  
8003      clear_msacsr_cause(env);
8004  
8005      switch (df) {
8006      case DF_WORD:
8007          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8008              /*
8009               * Half precision floats come in two formats: standard
8010               * IEEE and "ARM" format.  The latter gains extra exponent
8011               * range by omitting the NaN/Inf encodings.
8012               */
8013              bool ieee = true;
8014  
8015              MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
8016          }
8017          break;
8018      case DF_DOUBLE:
8019          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8020              MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
8021          }
8022          break;
8023      default:
8024          g_assert_not_reached();
8025      }
8026  
8027      check_msacsr_cause(env, GETPC());
8028      msa_move_v(pwd, pwx);
8029  }
8030  
8031  void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8032                          uint32_t ws)
8033  {
8034      wr_t wx, *pwx = &wx;
8035      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8036      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8037      uint32_t i;
8038  
8039      switch (df) {
8040      case DF_WORD:
8041          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8042              MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
8043          }
8044          break;
8045      case DF_DOUBLE:
8046          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8047              MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
8048          }
8049          break;
8050      default:
8051          g_assert_not_reached();
8052      }
8053  
8054      msa_move_v(pwd, pwx);
8055  }
8056  
8057  void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8058                          uint32_t ws)
8059  {
8060      wr_t wx, *pwx = &wx;
8061      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8062      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8063      uint32_t i;
8064  
8065      switch (df) {
8066      case DF_WORD:
8067          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8068              MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
8069          }
8070          break;
8071      case DF_DOUBLE:
8072          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8073              MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
8074          }
8075          break;
8076      default:
8077          g_assert_not_reached();
8078      }
8079  
8080      msa_move_v(pwd, pwx);
8081  }
8082  
8083  void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8084                             uint32_t ws)
8085  {
8086      wr_t wx, *pwx = &wx;
8087      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8088      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8089      uint32_t i;
8090  
8091      clear_msacsr_cause(env);
8092  
8093      switch (df) {
8094      case DF_WORD:
8095          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8096              MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
8097          }
8098          break;
8099      case DF_DOUBLE:
8100          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8101              MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
8102          }
8103          break;
8104      default:
8105          g_assert_not_reached();
8106      }
8107  
8108      check_msacsr_cause(env, GETPC());
8109  
8110      msa_move_v(pwd, pwx);
8111  }
8112  
8113  void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8114                             uint32_t ws)
8115  {
8116      wr_t wx, *pwx = &wx;
8117      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8118      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8119      uint32_t i;
8120  
8121      clear_msacsr_cause(env);
8122  
8123      switch (df) {
8124      case DF_WORD:
8125          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8126              MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
8127          }
8128          break;
8129      case DF_DOUBLE:
8130          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8131              MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
8132          }
8133          break;
8134      default:
8135          g_assert_not_reached();
8136      }
8137  
8138      check_msacsr_cause(env, GETPC());
8139  
8140      msa_move_v(pwd, pwx);
8141  }
8142  
8143  #define float32_from_int32 int32_to_float32
8144  #define float32_from_uint32 uint32_to_float32
8145  
8146  #define float64_from_int64 int64_to_float64
8147  #define float64_from_uint64 uint64_to_float64
8148  
8149  void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8150                             uint32_t ws)
8151  {
8152      wr_t wx, *pwx = &wx;
8153      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8154      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8155      uint32_t i;
8156  
8157      clear_msacsr_cause(env);
8158  
8159      switch (df) {
8160      case DF_WORD:
8161          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8162              MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
8163          }
8164          break;
8165      case DF_DOUBLE:
8166          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8167              MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
8168          }
8169          break;
8170      default:
8171          g_assert_not_reached();
8172      }
8173  
8174      check_msacsr_cause(env, GETPC());
8175  
8176      msa_move_v(pwd, pwx);
8177  }
8178  
8179  void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8180                             uint32_t ws)
8181  {
8182      wr_t wx, *pwx = &wx;
8183      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8184      wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8185      uint32_t i;
8186  
8187      clear_msacsr_cause(env);
8188  
8189      switch (df) {
8190      case DF_WORD:
8191          for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8192              MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
8193          }
8194          break;
8195      case DF_DOUBLE:
8196          for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8197              MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
8198          }
8199          break;
8200      default:
8201          g_assert_not_reached();
8202      }
8203  
8204      check_msacsr_cause(env, GETPC());
8205  
8206      msa_move_v(pwd, pwx);
8207  }
8208  
8209  /* Data format min and max values */
8210  #define DF_BITS(df) (1 << ((df) + 3))
8211  
8212  /* Element-by-element access macros */
8213  #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
8214  
8215  static inline uint64_t bswap16x4(uint64_t x)
8216  {
8217      uint64_t m = 0x00ff00ff00ff00ffull;
8218      return ((x & m) << 8) | ((x >> 8) & m);
8219  }
8220  
8221  static inline uint64_t bswap32x2(uint64_t x)
8222  {
8223      return ror64(bswap64(x), 32);
8224  }
8225  
8226  void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
8227                       target_ulong addr)
8228  {
8229      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8230      uintptr_t ra = GETPC();
8231      uint64_t d0, d1;
8232  
8233      /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
8234      d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8235      d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8236      pwd->d[0] = d0;
8237      pwd->d[1] = d1;
8238  }
8239  
8240  void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
8241                       target_ulong addr)
8242  {
8243      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8244      uintptr_t ra = GETPC();
8245      uint64_t d0, d1;
8246  
8247      /*
8248       * Load 8 bytes at a time.  Use little-endian load, then for
8249       * big-endian target, we must then swap the four halfwords.
8250       */
8251      d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8252      d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8253      if (mips_env_is_bigendian(env)) {
8254          d0 = bswap16x4(d0);
8255          d1 = bswap16x4(d1);
8256      }
8257      pwd->d[0] = d0;
8258      pwd->d[1] = d1;
8259  }
8260  
8261  void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
8262                       target_ulong addr)
8263  {
8264      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8265      uintptr_t ra = GETPC();
8266      uint64_t d0, d1;
8267  
8268      /*
8269       * Load 8 bytes at a time.  Use little-endian load, then for
8270       * big-endian target, we must then bswap the two words.
8271       */
8272      d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8273      d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8274      if (mips_env_is_bigendian(env)) {
8275          d0 = bswap32x2(d0);
8276          d1 = bswap32x2(d1);
8277      }
8278      pwd->d[0] = d0;
8279      pwd->d[1] = d1;
8280  }
8281  
8282  void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
8283                       target_ulong addr)
8284  {
8285      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8286      uintptr_t ra = GETPC();
8287      uint64_t d0, d1;
8288  
8289      d0 = cpu_ldq_data_ra(env, addr + 0, ra);
8290      d1 = cpu_ldq_data_ra(env, addr + 8, ra);
8291      pwd->d[0] = d0;
8292      pwd->d[1] = d1;
8293  }
8294  
8295  #define MSA_PAGESPAN(x) \
8296          ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
8297  
8298  static inline void ensure_writable_pages(CPUMIPSState *env,
8299                                           target_ulong addr,
8300                                           int mmu_idx,
8301                                           uintptr_t retaddr)
8302  {
8303      /* FIXME: Probe the actual accesses (pass and use a size) */
8304      if (unlikely(MSA_PAGESPAN(addr))) {
8305          /* first page */
8306          probe_write(env, addr, 0, mmu_idx, retaddr);
8307          /* second page */
8308          addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8309          probe_write(env, addr, 0, mmu_idx, retaddr);
8310      }
8311  }
8312  
8313  void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
8314                       target_ulong addr)
8315  {
8316      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8317      int mmu_idx = mips_env_mmu_index(env);
8318      uintptr_t ra = GETPC();
8319  
8320      ensure_writable_pages(env, addr, mmu_idx, ra);
8321  
8322      /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
8323      cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
8324      cpu_stq_le_data_ra(env, addr + 8, pwd->d[1], ra);
8325  }
8326  
8327  void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
8328                       target_ulong addr)
8329  {
8330      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8331      int mmu_idx = mips_env_mmu_index(env);
8332      uintptr_t ra = GETPC();
8333      uint64_t d0, d1;
8334  
8335      ensure_writable_pages(env, addr, mmu_idx, ra);
8336  
8337      /* Store 8 bytes at a time.  See helper_msa_ld_h. */
8338      d0 = pwd->d[0];
8339      d1 = pwd->d[1];
8340      if (mips_env_is_bigendian(env)) {
8341          d0 = bswap16x4(d0);
8342          d1 = bswap16x4(d1);
8343      }
8344      cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8345      cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8346  }
8347  
8348  void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
8349                       target_ulong addr)
8350  {
8351      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8352      int mmu_idx = mips_env_mmu_index(env);
8353      uintptr_t ra = GETPC();
8354      uint64_t d0, d1;
8355  
8356      ensure_writable_pages(env, addr, mmu_idx, ra);
8357  
8358      /* Store 8 bytes at a time.  See helper_msa_ld_w. */
8359      d0 = pwd->d[0];
8360      d1 = pwd->d[1];
8361      if (mips_env_is_bigendian(env)) {
8362          d0 = bswap32x2(d0);
8363          d1 = bswap32x2(d1);
8364      }
8365      cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8366      cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8367  }
8368  
8369  void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
8370                       target_ulong addr)
8371  {
8372      wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8373      int mmu_idx = mips_env_mmu_index(env);
8374      uintptr_t ra = GETPC();
8375  
8376      ensure_writable_pages(env, addr, mmu_idx, GETPC());
8377  
8378      cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
8379      cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
8380  }
8381