1 /*
2 * Generic vectorized operation runtime
3 *
4 * Copyright (c) 2018 Linaro
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
22 #include "exec/helper-proto-common.h"
23 #include "tcg/tcg-gvec-desc.h"
24
25
clear_high(void * d,intptr_t oprsz,uint32_t desc)26 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
27 {
28 intptr_t maxsz = simd_maxsz(desc);
29 intptr_t i;
30
31 if (unlikely(maxsz > oprsz)) {
32 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
33 *(uint64_t *)(d + i) = 0;
34 }
35 }
36 }
37
HELPER(gvec_add8)38 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
39 {
40 intptr_t oprsz = simd_oprsz(desc);
41 intptr_t i;
42
43 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
44 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
45 }
46 clear_high(d, oprsz, desc);
47 }
48
HELPER(gvec_add16)49 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
50 {
51 intptr_t oprsz = simd_oprsz(desc);
52 intptr_t i;
53
54 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
55 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
56 }
57 clear_high(d, oprsz, desc);
58 }
59
HELPER(gvec_add32)60 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
61 {
62 intptr_t oprsz = simd_oprsz(desc);
63 intptr_t i;
64
65 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
66 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
67 }
68 clear_high(d, oprsz, desc);
69 }
70
HELPER(gvec_add64)71 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
72 {
73 intptr_t oprsz = simd_oprsz(desc);
74 intptr_t i;
75
76 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
77 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
78 }
79 clear_high(d, oprsz, desc);
80 }
81
HELPER(gvec_adds8)82 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
83 {
84 intptr_t oprsz = simd_oprsz(desc);
85 intptr_t i;
86
87 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
88 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
89 }
90 clear_high(d, oprsz, desc);
91 }
92
HELPER(gvec_adds16)93 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
94 {
95 intptr_t oprsz = simd_oprsz(desc);
96 intptr_t i;
97
98 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
99 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
100 }
101 clear_high(d, oprsz, desc);
102 }
103
HELPER(gvec_adds32)104 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
105 {
106 intptr_t oprsz = simd_oprsz(desc);
107 intptr_t i;
108
109 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
110 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
111 }
112 clear_high(d, oprsz, desc);
113 }
114
HELPER(gvec_adds64)115 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
116 {
117 intptr_t oprsz = simd_oprsz(desc);
118 intptr_t i;
119
120 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
121 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
122 }
123 clear_high(d, oprsz, desc);
124 }
125
HELPER(gvec_sub8)126 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
127 {
128 intptr_t oprsz = simd_oprsz(desc);
129 intptr_t i;
130
131 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
132 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
133 }
134 clear_high(d, oprsz, desc);
135 }
136
HELPER(gvec_sub16)137 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
138 {
139 intptr_t oprsz = simd_oprsz(desc);
140 intptr_t i;
141
142 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
143 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
144 }
145 clear_high(d, oprsz, desc);
146 }
147
HELPER(gvec_sub32)148 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
149 {
150 intptr_t oprsz = simd_oprsz(desc);
151 intptr_t i;
152
153 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
154 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
155 }
156 clear_high(d, oprsz, desc);
157 }
158
HELPER(gvec_sub64)159 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
160 {
161 intptr_t oprsz = simd_oprsz(desc);
162 intptr_t i;
163
164 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
165 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
166 }
167 clear_high(d, oprsz, desc);
168 }
169
HELPER(gvec_subs8)170 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
171 {
172 intptr_t oprsz = simd_oprsz(desc);
173 intptr_t i;
174
175 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
176 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
177 }
178 clear_high(d, oprsz, desc);
179 }
180
HELPER(gvec_subs16)181 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
182 {
183 intptr_t oprsz = simd_oprsz(desc);
184 intptr_t i;
185
186 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
187 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
188 }
189 clear_high(d, oprsz, desc);
190 }
191
HELPER(gvec_subs32)192 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
193 {
194 intptr_t oprsz = simd_oprsz(desc);
195 intptr_t i;
196
197 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
198 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
199 }
200 clear_high(d, oprsz, desc);
201 }
202
HELPER(gvec_subs64)203 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
204 {
205 intptr_t oprsz = simd_oprsz(desc);
206 intptr_t i;
207
208 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
209 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
210 }
211 clear_high(d, oprsz, desc);
212 }
213
HELPER(gvec_mul8)214 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
215 {
216 intptr_t oprsz = simd_oprsz(desc);
217 intptr_t i;
218
219 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
220 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
221 }
222 clear_high(d, oprsz, desc);
223 }
224
HELPER(gvec_mul16)225 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
226 {
227 intptr_t oprsz = simd_oprsz(desc);
228 intptr_t i;
229
230 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
231 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
232 }
233 clear_high(d, oprsz, desc);
234 }
235
HELPER(gvec_mul32)236 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
237 {
238 intptr_t oprsz = simd_oprsz(desc);
239 intptr_t i;
240
241 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
242 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
243 }
244 clear_high(d, oprsz, desc);
245 }
246
HELPER(gvec_mul64)247 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
248 {
249 intptr_t oprsz = simd_oprsz(desc);
250 intptr_t i;
251
252 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
253 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
254 }
255 clear_high(d, oprsz, desc);
256 }
257
HELPER(gvec_muls8)258 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
259 {
260 intptr_t oprsz = simd_oprsz(desc);
261 intptr_t i;
262
263 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
264 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
265 }
266 clear_high(d, oprsz, desc);
267 }
268
HELPER(gvec_muls16)269 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
270 {
271 intptr_t oprsz = simd_oprsz(desc);
272 intptr_t i;
273
274 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
275 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
276 }
277 clear_high(d, oprsz, desc);
278 }
279
HELPER(gvec_muls32)280 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
281 {
282 intptr_t oprsz = simd_oprsz(desc);
283 intptr_t i;
284
285 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
286 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
287 }
288 clear_high(d, oprsz, desc);
289 }
290
HELPER(gvec_muls64)291 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
292 {
293 intptr_t oprsz = simd_oprsz(desc);
294 intptr_t i;
295
296 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
297 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
298 }
299 clear_high(d, oprsz, desc);
300 }
301
HELPER(gvec_neg8)302 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
303 {
304 intptr_t oprsz = simd_oprsz(desc);
305 intptr_t i;
306
307 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
308 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
309 }
310 clear_high(d, oprsz, desc);
311 }
312
HELPER(gvec_neg16)313 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
314 {
315 intptr_t oprsz = simd_oprsz(desc);
316 intptr_t i;
317
318 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
319 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
320 }
321 clear_high(d, oprsz, desc);
322 }
323
HELPER(gvec_neg32)324 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
325 {
326 intptr_t oprsz = simd_oprsz(desc);
327 intptr_t i;
328
329 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
330 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
331 }
332 clear_high(d, oprsz, desc);
333 }
334
HELPER(gvec_neg64)335 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
336 {
337 intptr_t oprsz = simd_oprsz(desc);
338 intptr_t i;
339
340 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
341 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
342 }
343 clear_high(d, oprsz, desc);
344 }
345
HELPER(gvec_abs8)346 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
347 {
348 intptr_t oprsz = simd_oprsz(desc);
349 intptr_t i;
350
351 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
352 int8_t aa = *(int8_t *)(a + i);
353 *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
354 }
355 clear_high(d, oprsz, desc);
356 }
357
HELPER(gvec_abs16)358 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
359 {
360 intptr_t oprsz = simd_oprsz(desc);
361 intptr_t i;
362
363 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
364 int16_t aa = *(int16_t *)(a + i);
365 *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
366 }
367 clear_high(d, oprsz, desc);
368 }
369
HELPER(gvec_abs32)370 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
371 {
372 intptr_t oprsz = simd_oprsz(desc);
373 intptr_t i;
374
375 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
376 int32_t aa = *(int32_t *)(a + i);
377 *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
378 }
379 clear_high(d, oprsz, desc);
380 }
381
HELPER(gvec_abs64)382 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
383 {
384 intptr_t oprsz = simd_oprsz(desc);
385 intptr_t i;
386
387 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
388 int64_t aa = *(int64_t *)(a + i);
389 *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
390 }
391 clear_high(d, oprsz, desc);
392 }
393
HELPER(gvec_mov)394 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
395 {
396 intptr_t oprsz = simd_oprsz(desc);
397
398 memcpy(d, a, oprsz);
399 clear_high(d, oprsz, desc);
400 }
401
HELPER(gvec_dup64)402 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
403 {
404 intptr_t oprsz = simd_oprsz(desc);
405 intptr_t i;
406
407 if (c == 0) {
408 oprsz = 0;
409 } else {
410 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
411 *(uint64_t *)(d + i) = c;
412 }
413 }
414 clear_high(d, oprsz, desc);
415 }
416
HELPER(gvec_dup32)417 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
418 {
419 intptr_t oprsz = simd_oprsz(desc);
420 intptr_t i;
421
422 if (c == 0) {
423 oprsz = 0;
424 } else {
425 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
426 *(uint32_t *)(d + i) = c;
427 }
428 }
429 clear_high(d, oprsz, desc);
430 }
431
HELPER(gvec_dup16)432 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
433 {
434 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
435 }
436
HELPER(gvec_dup8)437 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
438 {
439 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
440 }
441
HELPER(gvec_not)442 void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
443 {
444 intptr_t oprsz = simd_oprsz(desc);
445 intptr_t i;
446
447 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
448 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
449 }
450 clear_high(d, oprsz, desc);
451 }
452
HELPER(gvec_and)453 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
454 {
455 intptr_t oprsz = simd_oprsz(desc);
456 intptr_t i;
457
458 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
459 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
460 }
461 clear_high(d, oprsz, desc);
462 }
463
HELPER(gvec_or)464 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
465 {
466 intptr_t oprsz = simd_oprsz(desc);
467 intptr_t i;
468
469 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
470 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
471 }
472 clear_high(d, oprsz, desc);
473 }
474
HELPER(gvec_xor)475 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
476 {
477 intptr_t oprsz = simd_oprsz(desc);
478 intptr_t i;
479
480 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
481 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
482 }
483 clear_high(d, oprsz, desc);
484 }
485
HELPER(gvec_andc)486 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
487 {
488 intptr_t oprsz = simd_oprsz(desc);
489 intptr_t i;
490
491 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
492 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
493 }
494 clear_high(d, oprsz, desc);
495 }
496
HELPER(gvec_orc)497 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
498 {
499 intptr_t oprsz = simd_oprsz(desc);
500 intptr_t i;
501
502 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
503 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
504 }
505 clear_high(d, oprsz, desc);
506 }
507
HELPER(gvec_nand)508 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
509 {
510 intptr_t oprsz = simd_oprsz(desc);
511 intptr_t i;
512
513 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
514 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
515 }
516 clear_high(d, oprsz, desc);
517 }
518
HELPER(gvec_nor)519 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
520 {
521 intptr_t oprsz = simd_oprsz(desc);
522 intptr_t i;
523
524 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
525 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
526 }
527 clear_high(d, oprsz, desc);
528 }
529
HELPER(gvec_eqv)530 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
531 {
532 intptr_t oprsz = simd_oprsz(desc);
533 intptr_t i;
534
535 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
536 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
537 }
538 clear_high(d, oprsz, desc);
539 }
540
HELPER(gvec_ands)541 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
542 {
543 intptr_t oprsz = simd_oprsz(desc);
544 intptr_t i;
545
546 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
547 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
548 }
549 clear_high(d, oprsz, desc);
550 }
551
HELPER(gvec_andcs)552 void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
553 {
554 intptr_t oprsz = simd_oprsz(desc);
555 intptr_t i;
556
557 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
558 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
559 }
560 clear_high(d, oprsz, desc);
561 }
562
HELPER(gvec_xors)563 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
564 {
565 intptr_t oprsz = simd_oprsz(desc);
566 intptr_t i;
567
568 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
569 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
570 }
571 clear_high(d, oprsz, desc);
572 }
573
HELPER(gvec_ors)574 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
575 {
576 intptr_t oprsz = simd_oprsz(desc);
577 intptr_t i;
578
579 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
580 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
581 }
582 clear_high(d, oprsz, desc);
583 }
584
HELPER(gvec_shl8i)585 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
586 {
587 intptr_t oprsz = simd_oprsz(desc);
588 int shift = simd_data(desc);
589 intptr_t i;
590
591 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
592 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
593 }
594 clear_high(d, oprsz, desc);
595 }
596
HELPER(gvec_shl16i)597 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
598 {
599 intptr_t oprsz = simd_oprsz(desc);
600 int shift = simd_data(desc);
601 intptr_t i;
602
603 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
604 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
605 }
606 clear_high(d, oprsz, desc);
607 }
608
HELPER(gvec_shl32i)609 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
610 {
611 intptr_t oprsz = simd_oprsz(desc);
612 int shift = simd_data(desc);
613 intptr_t i;
614
615 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
616 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
617 }
618 clear_high(d, oprsz, desc);
619 }
620
HELPER(gvec_shl64i)621 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
622 {
623 intptr_t oprsz = simd_oprsz(desc);
624 int shift = simd_data(desc);
625 intptr_t i;
626
627 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
628 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
629 }
630 clear_high(d, oprsz, desc);
631 }
632
HELPER(gvec_shr8i)633 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
634 {
635 intptr_t oprsz = simd_oprsz(desc);
636 int shift = simd_data(desc);
637 intptr_t i;
638
639 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
640 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
641 }
642 clear_high(d, oprsz, desc);
643 }
644
HELPER(gvec_shr16i)645 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
646 {
647 intptr_t oprsz = simd_oprsz(desc);
648 int shift = simd_data(desc);
649 intptr_t i;
650
651 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
652 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
653 }
654 clear_high(d, oprsz, desc);
655 }
656
HELPER(gvec_shr32i)657 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
658 {
659 intptr_t oprsz = simd_oprsz(desc);
660 int shift = simd_data(desc);
661 intptr_t i;
662
663 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
664 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
665 }
666 clear_high(d, oprsz, desc);
667 }
668
HELPER(gvec_shr64i)669 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
670 {
671 intptr_t oprsz = simd_oprsz(desc);
672 int shift = simd_data(desc);
673 intptr_t i;
674
675 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
676 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
677 }
678 clear_high(d, oprsz, desc);
679 }
680
HELPER(gvec_sar8i)681 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
682 {
683 intptr_t oprsz = simd_oprsz(desc);
684 int shift = simd_data(desc);
685 intptr_t i;
686
687 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
688 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
689 }
690 clear_high(d, oprsz, desc);
691 }
692
HELPER(gvec_sar16i)693 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
694 {
695 intptr_t oprsz = simd_oprsz(desc);
696 int shift = simd_data(desc);
697 intptr_t i;
698
699 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
700 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
701 }
702 clear_high(d, oprsz, desc);
703 }
704
HELPER(gvec_sar32i)705 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
706 {
707 intptr_t oprsz = simd_oprsz(desc);
708 int shift = simd_data(desc);
709 intptr_t i;
710
711 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
712 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
713 }
714 clear_high(d, oprsz, desc);
715 }
716
HELPER(gvec_sar64i)717 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
718 {
719 intptr_t oprsz = simd_oprsz(desc);
720 int shift = simd_data(desc);
721 intptr_t i;
722
723 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
724 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
725 }
726 clear_high(d, oprsz, desc);
727 }
728
HELPER(gvec_rotl8i)729 void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
730 {
731 intptr_t oprsz = simd_oprsz(desc);
732 int shift = simd_data(desc);
733 intptr_t i;
734
735 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
736 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
737 }
738 clear_high(d, oprsz, desc);
739 }
740
HELPER(gvec_rotl16i)741 void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
742 {
743 intptr_t oprsz = simd_oprsz(desc);
744 int shift = simd_data(desc);
745 intptr_t i;
746
747 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
748 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
749 }
750 clear_high(d, oprsz, desc);
751 }
752
HELPER(gvec_rotl32i)753 void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
754 {
755 intptr_t oprsz = simd_oprsz(desc);
756 int shift = simd_data(desc);
757 intptr_t i;
758
759 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
760 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
761 }
762 clear_high(d, oprsz, desc);
763 }
764
HELPER(gvec_rotl64i)765 void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
766 {
767 intptr_t oprsz = simd_oprsz(desc);
768 int shift = simd_data(desc);
769 intptr_t i;
770
771 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
772 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
773 }
774 clear_high(d, oprsz, desc);
775 }
776
HELPER(gvec_shl8v)777 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
778 {
779 intptr_t oprsz = simd_oprsz(desc);
780 intptr_t i;
781
782 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
783 uint8_t sh = *(uint8_t *)(b + i) & 7;
784 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
785 }
786 clear_high(d, oprsz, desc);
787 }
788
HELPER(gvec_shl16v)789 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
790 {
791 intptr_t oprsz = simd_oprsz(desc);
792 intptr_t i;
793
794 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
795 uint8_t sh = *(uint16_t *)(b + i) & 15;
796 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
797 }
798 clear_high(d, oprsz, desc);
799 }
800
HELPER(gvec_shl32v)801 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
802 {
803 intptr_t oprsz = simd_oprsz(desc);
804 intptr_t i;
805
806 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
807 uint8_t sh = *(uint32_t *)(b + i) & 31;
808 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
809 }
810 clear_high(d, oprsz, desc);
811 }
812
HELPER(gvec_shl64v)813 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
814 {
815 intptr_t oprsz = simd_oprsz(desc);
816 intptr_t i;
817
818 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
819 uint8_t sh = *(uint64_t *)(b + i) & 63;
820 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
821 }
822 clear_high(d, oprsz, desc);
823 }
824
HELPER(gvec_shr8v)825 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
826 {
827 intptr_t oprsz = simd_oprsz(desc);
828 intptr_t i;
829
830 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
831 uint8_t sh = *(uint8_t *)(b + i) & 7;
832 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
833 }
834 clear_high(d, oprsz, desc);
835 }
836
HELPER(gvec_shr16v)837 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
838 {
839 intptr_t oprsz = simd_oprsz(desc);
840 intptr_t i;
841
842 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
843 uint8_t sh = *(uint16_t *)(b + i) & 15;
844 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
845 }
846 clear_high(d, oprsz, desc);
847 }
848
HELPER(gvec_shr32v)849 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
850 {
851 intptr_t oprsz = simd_oprsz(desc);
852 intptr_t i;
853
854 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
855 uint8_t sh = *(uint32_t *)(b + i) & 31;
856 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
857 }
858 clear_high(d, oprsz, desc);
859 }
860
HELPER(gvec_shr64v)861 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
862 {
863 intptr_t oprsz = simd_oprsz(desc);
864 intptr_t i;
865
866 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
867 uint8_t sh = *(uint64_t *)(b + i) & 63;
868 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
869 }
870 clear_high(d, oprsz, desc);
871 }
872
HELPER(gvec_sar8v)873 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
874 {
875 intptr_t oprsz = simd_oprsz(desc);
876 intptr_t i;
877
878 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
879 uint8_t sh = *(uint8_t *)(b + i) & 7;
880 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
881 }
882 clear_high(d, oprsz, desc);
883 }
884
HELPER(gvec_sar16v)885 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
886 {
887 intptr_t oprsz = simd_oprsz(desc);
888 intptr_t i;
889
890 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
891 uint8_t sh = *(uint16_t *)(b + i) & 15;
892 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
893 }
894 clear_high(d, oprsz, desc);
895 }
896
HELPER(gvec_sar32v)897 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
898 {
899 intptr_t oprsz = simd_oprsz(desc);
900 intptr_t i;
901
902 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
903 uint8_t sh = *(uint32_t *)(b + i) & 31;
904 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
905 }
906 clear_high(d, oprsz, desc);
907 }
908
HELPER(gvec_sar64v)909 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
910 {
911 intptr_t oprsz = simd_oprsz(desc);
912 intptr_t i;
913
914 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
915 uint8_t sh = *(uint64_t *)(b + i) & 63;
916 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
917 }
918 clear_high(d, oprsz, desc);
919 }
920
HELPER(gvec_rotl8v)921 void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
922 {
923 intptr_t oprsz = simd_oprsz(desc);
924 intptr_t i;
925
926 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
927 uint8_t sh = *(uint8_t *)(b + i) & 7;
928 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
929 }
930 clear_high(d, oprsz, desc);
931 }
932
HELPER(gvec_rotl16v)933 void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
934 {
935 intptr_t oprsz = simd_oprsz(desc);
936 intptr_t i;
937
938 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
939 uint8_t sh = *(uint16_t *)(b + i) & 15;
940 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
941 }
942 clear_high(d, oprsz, desc);
943 }
944
HELPER(gvec_rotl32v)945 void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
946 {
947 intptr_t oprsz = simd_oprsz(desc);
948 intptr_t i;
949
950 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
951 uint8_t sh = *(uint32_t *)(b + i) & 31;
952 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
953 }
954 clear_high(d, oprsz, desc);
955 }
956
HELPER(gvec_rotl64v)957 void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
958 {
959 intptr_t oprsz = simd_oprsz(desc);
960 intptr_t i;
961
962 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
963 uint8_t sh = *(uint64_t *)(b + i) & 63;
964 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
965 }
966 clear_high(d, oprsz, desc);
967 }
968
HELPER(gvec_rotr8v)969 void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
970 {
971 intptr_t oprsz = simd_oprsz(desc);
972 intptr_t i;
973
974 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
975 uint8_t sh = *(uint8_t *)(b + i) & 7;
976 *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
977 }
978 clear_high(d, oprsz, desc);
979 }
980
HELPER(gvec_rotr16v)981 void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
982 {
983 intptr_t oprsz = simd_oprsz(desc);
984 intptr_t i;
985
986 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
987 uint8_t sh = *(uint16_t *)(b + i) & 15;
988 *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
989 }
990 clear_high(d, oprsz, desc);
991 }
992
HELPER(gvec_rotr32v)993 void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
994 {
995 intptr_t oprsz = simd_oprsz(desc);
996 intptr_t i;
997
998 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
999 uint8_t sh = *(uint32_t *)(b + i) & 31;
1000 *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
1001 }
1002 clear_high(d, oprsz, desc);
1003 }
1004
HELPER(gvec_rotr64v)1005 void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
1006 {
1007 intptr_t oprsz = simd_oprsz(desc);
1008 intptr_t i;
1009
1010 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1011 uint8_t sh = *(uint64_t *)(b + i) & 63;
1012 *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
1013 }
1014 clear_high(d, oprsz, desc);
1015 }
1016
1017 #define DO_CMP1(NAME, TYPE, OP) \
1018 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
1019 { \
1020 intptr_t oprsz = simd_oprsz(desc); \
1021 intptr_t i; \
1022 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
1023 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
1024 } \
1025 clear_high(d, oprsz, desc); \
1026 }
1027
1028 #define DO_CMP2(SZ) \
1029 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
1030 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
1031 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
1032 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
1033 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
1034 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1035
1036 DO_CMP2(8)
1037 DO_CMP2(16)
1038 DO_CMP2(32)
1039 DO_CMP2(64)
1040
1041 #undef DO_CMP1
1042 #undef DO_CMP2
1043
1044 #define DO_CMP1(NAME, TYPE, OP) \
1045 void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \
1046 { \
1047 intptr_t oprsz = simd_oprsz(desc); \
1048 TYPE inv = simd_data(desc), b = b64; \
1049 for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \
1050 *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \
1051 } \
1052 clear_high(d, oprsz, desc); \
1053 }
1054
1055 #define DO_CMP2(SZ) \
1056 DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
1057 DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
1058 DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
1059 DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
1060 DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
1061
1062 DO_CMP2(8)
1063 DO_CMP2(16)
1064 DO_CMP2(32)
1065 DO_CMP2(64)
1066
1067 #undef DO_CMP1
1068 #undef DO_CMP2
1069
HELPER(gvec_ssadd8)1070 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1071 {
1072 intptr_t oprsz = simd_oprsz(desc);
1073 intptr_t i;
1074
1075 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1076 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1077 if (r > INT8_MAX) {
1078 r = INT8_MAX;
1079 } else if (r < INT8_MIN) {
1080 r = INT8_MIN;
1081 }
1082 *(int8_t *)(d + i) = r;
1083 }
1084 clear_high(d, oprsz, desc);
1085 }
1086
HELPER(gvec_ssadd16)1087 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1088 {
1089 intptr_t oprsz = simd_oprsz(desc);
1090 intptr_t i;
1091
1092 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1093 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1094 if (r > INT16_MAX) {
1095 r = INT16_MAX;
1096 } else if (r < INT16_MIN) {
1097 r = INT16_MIN;
1098 }
1099 *(int16_t *)(d + i) = r;
1100 }
1101 clear_high(d, oprsz, desc);
1102 }
1103
HELPER(gvec_ssadd32)1104 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1105 {
1106 intptr_t oprsz = simd_oprsz(desc);
1107 intptr_t i;
1108
1109 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1110 int32_t ai = *(int32_t *)(a + i);
1111 int32_t bi = *(int32_t *)(b + i);
1112 int32_t di;
1113 if (sadd32_overflow(ai, bi, &di)) {
1114 di = (di < 0 ? INT32_MAX : INT32_MIN);
1115 }
1116 *(int32_t *)(d + i) = di;
1117 }
1118 clear_high(d, oprsz, desc);
1119 }
1120
HELPER(gvec_ssadd64)1121 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1122 {
1123 intptr_t oprsz = simd_oprsz(desc);
1124 intptr_t i;
1125
1126 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1127 int64_t ai = *(int64_t *)(a + i);
1128 int64_t bi = *(int64_t *)(b + i);
1129 int64_t di;
1130 if (sadd64_overflow(ai, bi, &di)) {
1131 di = (di < 0 ? INT64_MAX : INT64_MIN);
1132 }
1133 *(int64_t *)(d + i) = di;
1134 }
1135 clear_high(d, oprsz, desc);
1136 }
1137
HELPER(gvec_sssub8)1138 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1139 {
1140 intptr_t oprsz = simd_oprsz(desc);
1141 intptr_t i;
1142
1143 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1144 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1145 if (r > INT8_MAX) {
1146 r = INT8_MAX;
1147 } else if (r < INT8_MIN) {
1148 r = INT8_MIN;
1149 }
1150 *(uint8_t *)(d + i) = r;
1151 }
1152 clear_high(d, oprsz, desc);
1153 }
1154
HELPER(gvec_sssub16)1155 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1156 {
1157 intptr_t oprsz = simd_oprsz(desc);
1158 intptr_t i;
1159
1160 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1161 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1162 if (r > INT16_MAX) {
1163 r = INT16_MAX;
1164 } else if (r < INT16_MIN) {
1165 r = INT16_MIN;
1166 }
1167 *(int16_t *)(d + i) = r;
1168 }
1169 clear_high(d, oprsz, desc);
1170 }
1171
HELPER(gvec_sssub32)1172 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1173 {
1174 intptr_t oprsz = simd_oprsz(desc);
1175 intptr_t i;
1176
1177 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1178 int32_t ai = *(int32_t *)(a + i);
1179 int32_t bi = *(int32_t *)(b + i);
1180 int32_t di;
1181 if (ssub32_overflow(ai, bi, &di)) {
1182 di = (di < 0 ? INT32_MAX : INT32_MIN);
1183 }
1184 *(int32_t *)(d + i) = di;
1185 }
1186 clear_high(d, oprsz, desc);
1187 }
1188
HELPER(gvec_sssub64)1189 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1190 {
1191 intptr_t oprsz = simd_oprsz(desc);
1192 intptr_t i;
1193
1194 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1195 int64_t ai = *(int64_t *)(a + i);
1196 int64_t bi = *(int64_t *)(b + i);
1197 int64_t di;
1198 if (ssub64_overflow(ai, bi, &di)) {
1199 di = (di < 0 ? INT64_MAX : INT64_MIN);
1200 }
1201 *(int64_t *)(d + i) = di;
1202 }
1203 clear_high(d, oprsz, desc);
1204 }
1205
HELPER(gvec_usadd8)1206 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1207 {
1208 intptr_t oprsz = simd_oprsz(desc);
1209 intptr_t i;
1210
1211 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1212 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1213 if (r > UINT8_MAX) {
1214 r = UINT8_MAX;
1215 }
1216 *(uint8_t *)(d + i) = r;
1217 }
1218 clear_high(d, oprsz, desc);
1219 }
1220
HELPER(gvec_usadd16)1221 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1222 {
1223 intptr_t oprsz = simd_oprsz(desc);
1224 intptr_t i;
1225
1226 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1227 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1228 if (r > UINT16_MAX) {
1229 r = UINT16_MAX;
1230 }
1231 *(uint16_t *)(d + i) = r;
1232 }
1233 clear_high(d, oprsz, desc);
1234 }
1235
HELPER(gvec_usadd32)1236 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1237 {
1238 intptr_t oprsz = simd_oprsz(desc);
1239 intptr_t i;
1240
1241 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1242 uint32_t ai = *(uint32_t *)(a + i);
1243 uint32_t bi = *(uint32_t *)(b + i);
1244 uint32_t di;
1245 if (uadd32_overflow(ai, bi, &di)) {
1246 di = UINT32_MAX;
1247 }
1248 *(uint32_t *)(d + i) = di;
1249 }
1250 clear_high(d, oprsz, desc);
1251 }
1252
HELPER(gvec_usadd64)1253 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1254 {
1255 intptr_t oprsz = simd_oprsz(desc);
1256 intptr_t i;
1257
1258 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1259 uint64_t ai = *(uint64_t *)(a + i);
1260 uint64_t bi = *(uint64_t *)(b + i);
1261 uint64_t di;
1262 if (uadd64_overflow(ai, bi, &di)) {
1263 di = UINT64_MAX;
1264 }
1265 *(uint64_t *)(d + i) = di;
1266 }
1267 clear_high(d, oprsz, desc);
1268 }
1269
HELPER(gvec_ussub8)1270 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1271 {
1272 intptr_t oprsz = simd_oprsz(desc);
1273 intptr_t i;
1274
1275 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1276 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1277 if (r < 0) {
1278 r = 0;
1279 }
1280 *(uint8_t *)(d + i) = r;
1281 }
1282 clear_high(d, oprsz, desc);
1283 }
1284
HELPER(gvec_ussub16)1285 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1286 {
1287 intptr_t oprsz = simd_oprsz(desc);
1288 intptr_t i;
1289
1290 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1291 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1292 if (r < 0) {
1293 r = 0;
1294 }
1295 *(uint16_t *)(d + i) = r;
1296 }
1297 clear_high(d, oprsz, desc);
1298 }
1299
HELPER(gvec_ussub32)1300 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1301 {
1302 intptr_t oprsz = simd_oprsz(desc);
1303 intptr_t i;
1304
1305 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1306 uint32_t ai = *(uint32_t *)(a + i);
1307 uint32_t bi = *(uint32_t *)(b + i);
1308 uint32_t di;
1309 if (usub32_overflow(ai, bi, &di)) {
1310 di = 0;
1311 }
1312 *(uint32_t *)(d + i) = di;
1313 }
1314 clear_high(d, oprsz, desc);
1315 }
1316
HELPER(gvec_ussub64)1317 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1318 {
1319 intptr_t oprsz = simd_oprsz(desc);
1320 intptr_t i;
1321
1322 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1323 uint64_t ai = *(uint64_t *)(a + i);
1324 uint64_t bi = *(uint64_t *)(b + i);
1325 uint64_t di;
1326 if (usub64_overflow(ai, bi, &di)) {
1327 di = 0;
1328 }
1329 *(uint64_t *)(d + i) = di;
1330 }
1331 clear_high(d, oprsz, desc);
1332 }
1333
HELPER(gvec_smin8)1334 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1335 {
1336 intptr_t oprsz = simd_oprsz(desc);
1337 intptr_t i;
1338
1339 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1340 int8_t aa = *(int8_t *)(a + i);
1341 int8_t bb = *(int8_t *)(b + i);
1342 int8_t dd = aa < bb ? aa : bb;
1343 *(int8_t *)(d + i) = dd;
1344 }
1345 clear_high(d, oprsz, desc);
1346 }
1347
HELPER(gvec_smin16)1348 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1349 {
1350 intptr_t oprsz = simd_oprsz(desc);
1351 intptr_t i;
1352
1353 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1354 int16_t aa = *(int16_t *)(a + i);
1355 int16_t bb = *(int16_t *)(b + i);
1356 int16_t dd = aa < bb ? aa : bb;
1357 *(int16_t *)(d + i) = dd;
1358 }
1359 clear_high(d, oprsz, desc);
1360 }
1361
HELPER(gvec_smin32)1362 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1363 {
1364 intptr_t oprsz = simd_oprsz(desc);
1365 intptr_t i;
1366
1367 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1368 int32_t aa = *(int32_t *)(a + i);
1369 int32_t bb = *(int32_t *)(b + i);
1370 int32_t dd = aa < bb ? aa : bb;
1371 *(int32_t *)(d + i) = dd;
1372 }
1373 clear_high(d, oprsz, desc);
1374 }
1375
HELPER(gvec_smin64)1376 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1377 {
1378 intptr_t oprsz = simd_oprsz(desc);
1379 intptr_t i;
1380
1381 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1382 int64_t aa = *(int64_t *)(a + i);
1383 int64_t bb = *(int64_t *)(b + i);
1384 int64_t dd = aa < bb ? aa : bb;
1385 *(int64_t *)(d + i) = dd;
1386 }
1387 clear_high(d, oprsz, desc);
1388 }
1389
HELPER(gvec_smax8)1390 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1391 {
1392 intptr_t oprsz = simd_oprsz(desc);
1393 intptr_t i;
1394
1395 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1396 int8_t aa = *(int8_t *)(a + i);
1397 int8_t bb = *(int8_t *)(b + i);
1398 int8_t dd = aa > bb ? aa : bb;
1399 *(int8_t *)(d + i) = dd;
1400 }
1401 clear_high(d, oprsz, desc);
1402 }
1403
HELPER(gvec_smax16)1404 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1405 {
1406 intptr_t oprsz = simd_oprsz(desc);
1407 intptr_t i;
1408
1409 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1410 int16_t aa = *(int16_t *)(a + i);
1411 int16_t bb = *(int16_t *)(b + i);
1412 int16_t dd = aa > bb ? aa : bb;
1413 *(int16_t *)(d + i) = dd;
1414 }
1415 clear_high(d, oprsz, desc);
1416 }
1417
HELPER(gvec_smax32)1418 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1419 {
1420 intptr_t oprsz = simd_oprsz(desc);
1421 intptr_t i;
1422
1423 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1424 int32_t aa = *(int32_t *)(a + i);
1425 int32_t bb = *(int32_t *)(b + i);
1426 int32_t dd = aa > bb ? aa : bb;
1427 *(int32_t *)(d + i) = dd;
1428 }
1429 clear_high(d, oprsz, desc);
1430 }
1431
HELPER(gvec_smax64)1432 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1433 {
1434 intptr_t oprsz = simd_oprsz(desc);
1435 intptr_t i;
1436
1437 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1438 int64_t aa = *(int64_t *)(a + i);
1439 int64_t bb = *(int64_t *)(b + i);
1440 int64_t dd = aa > bb ? aa : bb;
1441 *(int64_t *)(d + i) = dd;
1442 }
1443 clear_high(d, oprsz, desc);
1444 }
1445
HELPER(gvec_umin8)1446 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1447 {
1448 intptr_t oprsz = simd_oprsz(desc);
1449 intptr_t i;
1450
1451 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1452 uint8_t aa = *(uint8_t *)(a + i);
1453 uint8_t bb = *(uint8_t *)(b + i);
1454 uint8_t dd = aa < bb ? aa : bb;
1455 *(uint8_t *)(d + i) = dd;
1456 }
1457 clear_high(d, oprsz, desc);
1458 }
1459
HELPER(gvec_umin16)1460 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1461 {
1462 intptr_t oprsz = simd_oprsz(desc);
1463 intptr_t i;
1464
1465 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1466 uint16_t aa = *(uint16_t *)(a + i);
1467 uint16_t bb = *(uint16_t *)(b + i);
1468 uint16_t dd = aa < bb ? aa : bb;
1469 *(uint16_t *)(d + i) = dd;
1470 }
1471 clear_high(d, oprsz, desc);
1472 }
1473
HELPER(gvec_umin32)1474 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1475 {
1476 intptr_t oprsz = simd_oprsz(desc);
1477 intptr_t i;
1478
1479 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1480 uint32_t aa = *(uint32_t *)(a + i);
1481 uint32_t bb = *(uint32_t *)(b + i);
1482 uint32_t dd = aa < bb ? aa : bb;
1483 *(uint32_t *)(d + i) = dd;
1484 }
1485 clear_high(d, oprsz, desc);
1486 }
1487
HELPER(gvec_umin64)1488 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1489 {
1490 intptr_t oprsz = simd_oprsz(desc);
1491 intptr_t i;
1492
1493 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1494 uint64_t aa = *(uint64_t *)(a + i);
1495 uint64_t bb = *(uint64_t *)(b + i);
1496 uint64_t dd = aa < bb ? aa : bb;
1497 *(uint64_t *)(d + i) = dd;
1498 }
1499 clear_high(d, oprsz, desc);
1500 }
1501
HELPER(gvec_umax8)1502 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1503 {
1504 intptr_t oprsz = simd_oprsz(desc);
1505 intptr_t i;
1506
1507 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1508 uint8_t aa = *(uint8_t *)(a + i);
1509 uint8_t bb = *(uint8_t *)(b + i);
1510 uint8_t dd = aa > bb ? aa : bb;
1511 *(uint8_t *)(d + i) = dd;
1512 }
1513 clear_high(d, oprsz, desc);
1514 }
1515
HELPER(gvec_umax16)1516 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1517 {
1518 intptr_t oprsz = simd_oprsz(desc);
1519 intptr_t i;
1520
1521 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1522 uint16_t aa = *(uint16_t *)(a + i);
1523 uint16_t bb = *(uint16_t *)(b + i);
1524 uint16_t dd = aa > bb ? aa : bb;
1525 *(uint16_t *)(d + i) = dd;
1526 }
1527 clear_high(d, oprsz, desc);
1528 }
1529
HELPER(gvec_umax32)1530 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1531 {
1532 intptr_t oprsz = simd_oprsz(desc);
1533 intptr_t i;
1534
1535 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1536 uint32_t aa = *(uint32_t *)(a + i);
1537 uint32_t bb = *(uint32_t *)(b + i);
1538 uint32_t dd = aa > bb ? aa : bb;
1539 *(uint32_t *)(d + i) = dd;
1540 }
1541 clear_high(d, oprsz, desc);
1542 }
1543
HELPER(gvec_umax64)1544 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1545 {
1546 intptr_t oprsz = simd_oprsz(desc);
1547 intptr_t i;
1548
1549 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1550 uint64_t aa = *(uint64_t *)(a + i);
1551 uint64_t bb = *(uint64_t *)(b + i);
1552 uint64_t dd = aa > bb ? aa : bb;
1553 *(uint64_t *)(d + i) = dd;
1554 }
1555 clear_high(d, oprsz, desc);
1556 }
1557
HELPER(gvec_bitsel)1558 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
1559 {
1560 intptr_t oprsz = simd_oprsz(desc);
1561 intptr_t i;
1562
1563 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1564 uint64_t aa = *(uint64_t *)(a + i);
1565 uint64_t bb = *(uint64_t *)(b + i);
1566 uint64_t cc = *(uint64_t *)(c + i);
1567 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
1568 }
1569 clear_high(d, oprsz, desc);
1570 }
1571