xref: /qemu/tests/tcg/hexagon/mem_noshuf.c (revision aa09b3d5f8e2819d53a6fd81e655ddb3ef107a47)
1  /*
2   *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3   *
4   *  This program is free software; you can redistribute it and/or modify
5   *  it under the terms of the GNU General Public License as published by
6   *  the Free Software Foundation; either version 2 of the License, or
7   *  (at your option) any later version.
8   *
9   *  This program is distributed in the hope that it will be useful,
10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   *  GNU General Public License for more details.
13   *
14   *  You should have received a copy of the GNU General Public License
15   *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16   */
17  
18  #include <stdio.h>
19  
20  /*
21   *  Make sure that the :mem_noshuf packet attribute is honored.
22   *  This is important when the addresses overlap.
23   *  The store instruction in slot 1 effectively executes first,
24   *  followed by the load instruction in slot 0.
25   */
26  
27  #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
28  static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
29  { \
30      unsigned int ret; \
31      asm volatile("{\n\t" \
32                   "    " #ST_OP "(%1) = %3\n\t" \
33                   "    %0 = " #LD_OP "(%2)\n\t" \
34                   "}:mem_noshuf\n" \
35                   : "=r"(ret) \
36                   : "r"(p), "r"(q), "r"(x) \
37                   : "memory"); \
38      return ret; \
39  }
40  
41  #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
42  static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
43  { \
44      unsigned long long ret; \
45      asm volatile("{\n\t" \
46                   "    " #ST_OP "(%1) = %3\n\t" \
47                   "    %0 = " #LD_OP "(%2)\n\t" \
48                   "}:mem_noshuf\n" \
49                   : "=r"(ret) \
50                   : "r"(p), "r"(q), "r"(x) \
51                   : "memory"); \
52      return ret; \
53  }
54  
55  /* Store byte combinations */
56  MEM_NOSHUF32(mem_noshuf_sb_lb,  signed char,  signed char,      memb, memb)
57  MEM_NOSHUF32(mem_noshuf_sb_lub, signed char,  unsigned char,    memb, memub)
58  MEM_NOSHUF32(mem_noshuf_sb_lh,  signed char,  signed short,     memb, memh)
59  MEM_NOSHUF32(mem_noshuf_sb_luh, signed char,  unsigned short,   memb, memuh)
60  MEM_NOSHUF32(mem_noshuf_sb_lw,  signed char,  signed int,       memb, memw)
61  MEM_NOSHUF64(mem_noshuf_sb_ld,  signed char,  signed long long, memb, memd)
62  
63  /* Store half combinations */
64  MEM_NOSHUF32(mem_noshuf_sh_lb,  signed short, signed char,      memh, memb)
65  MEM_NOSHUF32(mem_noshuf_sh_lub, signed short, unsigned char,    memh, memub)
66  MEM_NOSHUF32(mem_noshuf_sh_lh,  signed short, signed short,     memh, memh)
67  MEM_NOSHUF32(mem_noshuf_sh_luh, signed short, unsigned short,   memh, memuh)
68  MEM_NOSHUF32(mem_noshuf_sh_lw,  signed short, signed int,       memh, memw)
69  MEM_NOSHUF64(mem_noshuf_sh_ld,  signed short, signed long long, memh, memd)
70  
71  /* Store word combinations */
72  MEM_NOSHUF32(mem_noshuf_sw_lb,  signed int,   signed char,      memw, memb)
73  MEM_NOSHUF32(mem_noshuf_sw_lub, signed int,   unsigned char,    memw, memub)
74  MEM_NOSHUF32(mem_noshuf_sw_lh,  signed int,   signed short,     memw, memh)
75  MEM_NOSHUF32(mem_noshuf_sw_luh, signed int,   unsigned short,   memw, memuh)
76  MEM_NOSHUF32(mem_noshuf_sw_lw,  signed int,   signed int,       memw, memw)
77  MEM_NOSHUF64(mem_noshuf_sw_ld,  signed int,   signed long long, memw, memd)
78  
79  /* Store double combinations */
80  MEM_NOSHUF32(mem_noshuf_sd_lb,  long long,    signed char,      memd, memb)
81  MEM_NOSHUF32(mem_noshuf_sd_lub, long long,    unsigned char,    memd, memub)
82  MEM_NOSHUF32(mem_noshuf_sd_lh,  long long,    signed short,     memd, memh)
83  MEM_NOSHUF32(mem_noshuf_sd_luh, long long,    unsigned short,   memd, memuh)
84  MEM_NOSHUF32(mem_noshuf_sd_lw,  long long,    signed int,       memd, memw)
85  MEM_NOSHUF64(mem_noshuf_sd_ld,  long long,    signed long long, memd, memd)
86  
87  static inline int pred_lw_sw(int pred, int *p, int *q, int x, int y)
88  {
89      int ret;
90      asm volatile("p0 = cmp.eq(%5, #0)\n\t"
91                   "%0 = %3\n\t"
92                   "{\n\t"
93                   "    memw(%1) = %4\n\t"
94                   "    if (!p0) %0 = memw(%2)\n\t"
95                   "}:mem_noshuf\n"
96                   : "=&r"(ret)
97                   : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
98                   : "p0", "memory");
99      return ret;
100  }
101  
102  static inline int pred_lw_sw_pi(int pred, int *p, int *q, int x, int y)
103  {
104      int ret;
105      asm volatile("p0 = cmp.eq(%5, #0)\n\t"
106                   "%0 = %3\n\t"
107                   "r7 = %2\n\t"
108                   "{\n\t"
109                   "    memw(%1) = %4\n\t"
110                   "    if (!p0) %0 = memw(r7++#4)\n\t"
111                   "}:mem_noshuf\n"
112                   : "=&r"(ret)
113                   : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
114                   : "r7", "p0", "memory");
115      return ret;
116  }
117  
118  static inline long long pred_ld_sd(int pred, long long *p, long long *q,
119                                     long long x, long long y)
120  {
121      unsigned long long ret;
122      asm volatile("p0 = cmp.eq(%5, #0)\n\t"
123                   "%0 = %3\n\t"
124                   "{\n\t"
125                   "    memd(%1) = %4\n\t"
126                   "    if (!p0) %0 = memd(%2)\n\t"
127                   "}:mem_noshuf\n"
128                   : "=&r"(ret)
129                   : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
130                   : "p0", "memory");
131      return ret;
132  }
133  
134  static inline long long pred_ld_sd_pi(int pred, long long *p, long long *q,
135                                        long long x, long long y)
136  {
137      long long ret;
138      asm volatile("p0 = cmp.eq(%5, #0)\n\t"
139                   "%0 = %3\n\t"
140                   "r7 = %2\n\t"
141                   "{\n\t"
142                   "    memd(%1) = %4\n\t"
143                   "    if (!p0) %0 = memd(r7++#8)\n\t"
144                   "}:mem_noshuf\n"
145                   : "=&r"(ret)
146                   : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
147                   : "r7", "p0", "memory");
148      return ret;
149  }
150  
151  static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x)
152  {
153      unsigned int ret;
154      asm volatile("p0 = cmp.eq(%4, #0)\n\t"
155                   "{\n\t"
156                   "    if (!p0) memw(%1) = %3\n\t"
157                   "    %0 = memb(%2)\n\t"
158                   "}:mem_noshuf\n"
159                   : "=r"(ret)
160                   : "r"(p), "r"(q), "r"(x), "r"(pred)
161                   : "p0", "memory");
162      return ret;
163  }
164  
165  static inline
166  unsigned long long cancel_sw_ld(int pred, int *p, long long *q, int x)
167  {
168      long long ret;
169      asm volatile("p0 = cmp.eq(%4, #0)\n\t"
170                   "{\n\t"
171                   "    if (!p0) memw(%1) = %3\n\t"
172                   "    %0 = memd(%2)\n\t"
173                   "}:mem_noshuf\n"
174                   : "=r"(ret)
175                   : "r"(p), "r"(q), "r"(x), "r"(pred)
176                   : "p0", "memory");
177      return ret;
178  }
179  
180  typedef union {
181      signed long long d[2];
182      unsigned long long ud[2];
183      signed int w[4];
184      unsigned int uw[4];
185      signed short h[8];
186      unsigned short uh[8];
187      signed char b[16];
188      unsigned char ub[16];
189  } Memory;
190  
191  int err;
192  
193  #define check32(n, expect) check32_(n, expect, __LINE__)
194  
195  static void check32_(int n, int expect, int line)
196  {
197      if (n != expect) {
198          printf("ERROR: 0x%08x != 0x%08x, line %d\n", n, expect, line);
199          err++;
200      }
201  }
202  
203  #define check64(n, expect) check64_(n, expect, __LINE__)
204  
205  static void check64_(long long n, long long expect, int line)
206  {
207      if (n != expect) {
208          printf("ERROR: 0x%08llx != 0x%08llx, line %d\n", n, expect, line);
209          err++;
210      }
211  }
212  
213  int main()
214  {
215      Memory n;
216      unsigned int res32;
217      unsigned long long res64;
218  
219      /*
220       * Store byte combinations
221       */
222      n.w[0] = ~0;
223      res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
224      check32(res32, 0xffffff87);
225  
226      n.w[0] = ~0;
227      res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
228      check32(res32, 0x00000087);
229  
230      n.w[0] = ~0;
231      res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
232      check32(res32, 0xffffff87);
233  
234      n.w[0] = ~0;
235      res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
236      check32(res32, 0x0000ff87);
237  
238      n.w[0] = ~0;
239      res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
240      check32(res32, 0xffffff87);
241  
242      n.d[0] = ~0LL;
243      res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
244      check64(res64, 0xffffffffffffff87LL);
245  
246      /*
247       * Store half combinations
248       */
249      n.w[0] = ~0;
250      res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
251      check32(res32, 0xffffff87);
252  
253      n.w[0] = ~0;
254      res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
255      check32(res32, 0x0000008f);
256  
257      n.w[0] = ~0;
258      res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
259      check32(res32, 0xffff8a87);
260  
261      n.w[0] = ~0;
262      res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
263      check32(res32, 0x8a87);
264  
265      n.w[0] = ~0;
266      res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
267      check32(res32, 0x8a87ffff);
268  
269      n.w[0] = ~0;
270      res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
271      check64(res64, 0xffffffff8a87ffffLL);
272  
273      /*
274       * Store word combinations
275       */
276      n.w[0] = ~0;
277      res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
278      check32(res32, 0xffffff87);
279  
280      n.w[0] = ~0;
281      res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
282      check32(res32, 0x00000087);
283  
284      n.w[0] = ~0;
285      res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
286      check32(res32, 0xfffff678);
287  
288      n.w[0] = ~0;
289      res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
290      check32(res32, 0x00005678);
291  
292      n.w[0] = ~0;
293      res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
294      check32(res32, 0x12345678);
295  
296      n.d[0] = ~0LL;
297      res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
298      check64(res64, 0xffffffff12345678LL);
299  
300      /*
301       * Store double combinations
302       */
303      n.d[0] = ~0LL;
304      res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
305      check32(res32, 0xffffffde);
306  
307      n.d[0] = ~0LL;
308      res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
309      check32(res32, 0x000000de);
310  
311      n.d[0] = ~0LL;
312      res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
313      check32(res32, 0xffff9abc);
314  
315      n.d[0] = ~0LL;
316      res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
317      check32(res32, 0x00009abc);
318  
319      n.d[0] = ~0LL;
320      res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
321      check32(res32, 0x12345678);
322  
323      n.d[0] = ~0LL;
324      res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
325      check64(res64, 0x123456789abcdef0LL);
326  
327      /*
328       * Predicated word stores
329       */
330      n.w[0] = ~0;
331      res32 = cancel_sw_lb(0, &n.w[0], &n.b[0], 0x12345678);
332      check32(res32, 0xffffffff);
333  
334      n.w[0] = ~0;
335      res32 = cancel_sw_lb(1, &n.w[0], &n.b[0], 0x12345687);
336      check32(res32, 0xffffff87);
337  
338      /*
339       * Predicated double stores
340       */
341      n.d[0] = ~0LL;
342      res64 = cancel_sw_ld(0, &n.w[0], &n.d[0], 0x12345678);
343      check64(res64, 0xffffffffffffffffLL);
344  
345      n.d[0] = ~0LL;
346      res64 = cancel_sw_ld(1, &n.w[0], &n.d[0], 0x12345678);
347      check64(res64, 0xffffffff12345678LL);
348  
349      n.d[0] = ~0LL;
350      res64 = cancel_sw_ld(0, &n.w[1], &n.d[0], 0x12345678);
351      check64(res64, 0xffffffffffffffffLL);
352  
353      n.d[0] = ~0LL;
354      res64 = cancel_sw_ld(1, &n.w[1], &n.d[0], 0x12345678);
355      check64(res64, 0x12345678ffffffffLL);
356  
357      /*
358       * No overlap tests
359       */
360      n.w[0] = ~0;
361      res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
362      check32(res32, 0xffffffff);
363  
364      n.w[0] = ~0;
365      res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
366      check32(res32, 0xffffffff);
367  
368      n.w[0] = ~0;
369      res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
370      check32(res32, 0xffffffff);
371  
372      n.w[0] = ~0;
373      res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
374      check32(res32, 0xffffffff);
375  
376      n.d[0] = ~0LL;
377      res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
378      check32(res32, 0xffffffff);
379  
380      n.d[0] = ~0LL;
381      res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
382      check32(res32, 0xffffffff);
383  
384      n.d[0] = ~0LL;
385      n.d[1] = ~0LL;
386      res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
387      check64(res64, 0xffffffffffffffffLL);
388  
389      n.d[0] = ~0LL;
390      n.d[1] = ~0LL;
391      res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
392      check64(res64, 0xffffffffffffffffLL);
393  
394      n.w[0] = ~0;
395      res32 = pred_lw_sw(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
396      check32(res32, 0x12345678);
397      check32(n.w[0], 0xc0ffeeda);
398  
399      n.w[0] = ~0;
400      res32 = pred_lw_sw(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
401      check32(res32, 0xc0ffeeda);
402      check32(n.w[0], 0xc0ffeeda);
403  
404      n.w[0] = ~0;
405      res32 = pred_lw_sw_pi(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
406      check32(res32, 0x12345678);
407      check32(n.w[0], 0xc0ffeeda);
408  
409      n.w[0] = ~0;
410      res32 = pred_lw_sw_pi(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
411      check32(res32, 0xc0ffeeda);
412      check32(n.w[0], 0xc0ffeeda);
413  
414      n.d[0] = ~0LL;
415      res64 = pred_ld_sd(0, &n.d[0], &n.d[0],
416                         0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
417      check64(res64, 0x1234567812345678LL);
418      check64(n.d[0], 0xc0ffeedac0ffeedaLL);
419  
420      n.d[0] = ~0LL;
421      res64 = pred_ld_sd(1, &n.d[0], &n.d[0],
422                         0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
423      check64(res64, 0xc0ffeedac0ffeedaLL);
424      check64(n.d[0], 0xc0ffeedac0ffeedaLL);
425  
426      n.d[0] = ~0LL;
427      res64 = pred_ld_sd_pi(0, &n.d[0], &n.d[0],
428                            0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
429      check64(res64, 0x1234567812345678LL);
430      check64(n.d[0], 0xc0ffeedac0ffeedaLL);
431  
432      n.d[0] = ~0LL;
433      res64 = pred_ld_sd_pi(1, &n.d[0], &n.d[0],
434                            0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
435      check64(res64, 0xc0ffeedac0ffeedaLL);
436      check64(n.d[0], 0xc0ffeedac0ffeedaLL);
437  
438      puts(err ? "FAIL" : "PASS");
439      return err;
440  }
441