1 /* 2 * Generic vectorized operation runtime 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/host-utils.h" 22 #include "exec/helper-proto-common.h" 23 #include "tcg/tcg-gvec-desc.h" 24 25 26 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 27 { 28 intptr_t maxsz = simd_maxsz(desc); 29 intptr_t i; 30 31 if (unlikely(maxsz > oprsz)) { 32 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 33 *(uint64_t *)(d + i) = 0; 34 } 35 } 36 } 37 38 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 39 { 40 intptr_t oprsz = simd_oprsz(desc); 41 intptr_t i; 42 43 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 44 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 45 } 46 clear_high(d, oprsz, desc); 47 } 48 49 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 50 { 51 intptr_t oprsz = simd_oprsz(desc); 52 intptr_t i; 53 54 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 55 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 56 } 57 clear_high(d, oprsz, desc); 58 } 59 60 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 61 { 62 intptr_t oprsz = simd_oprsz(desc); 63 intptr_t i; 64 65 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 66 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i); 67 } 68 clear_high(d, oprsz, desc); 69 } 70 71 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 72 { 73 intptr_t oprsz = simd_oprsz(desc); 74 intptr_t i; 75 76 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 77 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i); 78 } 79 clear_high(d, oprsz, desc); 80 } 81 82 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 83 { 84 intptr_t oprsz = simd_oprsz(desc); 85 intptr_t i; 86 87 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 88 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b; 89 } 90 clear_high(d, oprsz, desc); 91 } 92 93 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 94 { 95 intptr_t oprsz = simd_oprsz(desc); 96 intptr_t i; 97 98 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 99 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b; 100 } 101 clear_high(d, oprsz, desc); 102 } 103 104 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 105 { 106 intptr_t oprsz = simd_oprsz(desc); 107 intptr_t i; 108 109 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 110 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b; 111 } 112 clear_high(d, oprsz, desc); 113 } 114 115 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 116 { 117 intptr_t oprsz = simd_oprsz(desc); 118 intptr_t i; 119 120 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 121 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b; 122 } 123 clear_high(d, oprsz, desc); 124 } 125 126 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 127 { 128 intptr_t oprsz = simd_oprsz(desc); 129 intptr_t i; 130 131 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 132 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 133 } 134 clear_high(d, oprsz, desc); 135 } 136 137 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 138 { 139 intptr_t oprsz = simd_oprsz(desc); 140 intptr_t i; 141 142 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 143 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 144 } 145 clear_high(d, oprsz, desc); 146 } 147 148 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 149 { 150 intptr_t oprsz = simd_oprsz(desc); 151 intptr_t i; 152 153 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 154 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i); 155 } 156 clear_high(d, oprsz, desc); 157 } 158 159 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 160 { 161 intptr_t oprsz = simd_oprsz(desc); 162 intptr_t i; 163 164 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 165 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i); 166 } 167 clear_high(d, oprsz, desc); 168 } 169 170 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 171 { 172 intptr_t oprsz = simd_oprsz(desc); 173 intptr_t i; 174 175 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 176 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b; 177 } 178 clear_high(d, oprsz, desc); 179 } 180 181 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 182 { 183 intptr_t oprsz = simd_oprsz(desc); 184 intptr_t i; 185 186 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 187 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b; 188 } 189 clear_high(d, oprsz, desc); 190 } 191 192 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 193 { 194 intptr_t oprsz = simd_oprsz(desc); 195 intptr_t i; 196 197 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 198 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b; 199 } 200 clear_high(d, oprsz, desc); 201 } 202 203 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 204 { 205 intptr_t oprsz = simd_oprsz(desc); 206 intptr_t i; 207 208 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 209 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b; 210 } 211 clear_high(d, oprsz, desc); 212 } 213 214 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 215 { 216 intptr_t oprsz = simd_oprsz(desc); 217 intptr_t i; 218 219 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 220 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i); 221 } 222 clear_high(d, oprsz, desc); 223 } 224 225 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 226 { 227 intptr_t oprsz = simd_oprsz(desc); 228 intptr_t i; 229 230 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 231 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i); 232 } 233 clear_high(d, oprsz, desc); 234 } 235 236 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 237 { 238 intptr_t oprsz = simd_oprsz(desc); 239 intptr_t i; 240 241 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 242 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i); 243 } 244 clear_high(d, oprsz, desc); 245 } 246 247 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 248 { 249 intptr_t oprsz = simd_oprsz(desc); 250 intptr_t i; 251 252 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 253 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i); 254 } 255 clear_high(d, oprsz, desc); 256 } 257 258 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 259 { 260 intptr_t oprsz = simd_oprsz(desc); 261 intptr_t i; 262 263 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 264 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b; 265 } 266 clear_high(d, oprsz, desc); 267 } 268 269 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 270 { 271 intptr_t oprsz = simd_oprsz(desc); 272 intptr_t i; 273 274 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 275 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b; 276 } 277 clear_high(d, oprsz, desc); 278 } 279 280 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 281 { 282 intptr_t oprsz = simd_oprsz(desc); 283 intptr_t i; 284 285 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 286 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b; 287 } 288 clear_high(d, oprsz, desc); 289 } 290 291 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 292 { 293 intptr_t oprsz = simd_oprsz(desc); 294 intptr_t i; 295 296 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 297 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b; 298 } 299 clear_high(d, oprsz, desc); 300 } 301 302 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 303 { 304 intptr_t oprsz = simd_oprsz(desc); 305 intptr_t i; 306 307 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 308 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i); 309 } 310 clear_high(d, oprsz, desc); 311 } 312 313 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 314 { 315 intptr_t oprsz = simd_oprsz(desc); 316 intptr_t i; 317 318 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 319 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i); 320 } 321 clear_high(d, oprsz, desc); 322 } 323 324 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 325 { 326 intptr_t oprsz = simd_oprsz(desc); 327 intptr_t i; 328 329 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 330 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i); 331 } 332 clear_high(d, oprsz, desc); 333 } 334 335 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 336 { 337 intptr_t oprsz = simd_oprsz(desc); 338 intptr_t i; 339 340 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 341 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i); 342 } 343 clear_high(d, oprsz, desc); 344 } 345 346 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 347 { 348 intptr_t oprsz = simd_oprsz(desc); 349 intptr_t i; 350 351 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 352 int8_t aa = *(int8_t *)(a + i); 353 *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 354 } 355 clear_high(d, oprsz, desc); 356 } 357 358 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 359 { 360 intptr_t oprsz = simd_oprsz(desc); 361 intptr_t i; 362 363 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 364 int16_t aa = *(int16_t *)(a + i); 365 *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 366 } 367 clear_high(d, oprsz, desc); 368 } 369 370 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 371 { 372 intptr_t oprsz = simd_oprsz(desc); 373 intptr_t i; 374 375 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 376 int32_t aa = *(int32_t *)(a + i); 377 *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 378 } 379 clear_high(d, oprsz, desc); 380 } 381 382 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 383 { 384 intptr_t oprsz = simd_oprsz(desc); 385 intptr_t i; 386 387 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 388 int64_t aa = *(int64_t *)(a + i); 389 *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 390 } 391 clear_high(d, oprsz, desc); 392 } 393 394 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 395 { 396 intptr_t oprsz = simd_oprsz(desc); 397 398 memcpy(d, a, oprsz); 399 clear_high(d, oprsz, desc); 400 } 401 402 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 403 { 404 intptr_t oprsz = simd_oprsz(desc); 405 intptr_t i; 406 407 if (c == 0) { 408 oprsz = 0; 409 } else { 410 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 411 *(uint64_t *)(d + i) = c; 412 } 413 } 414 clear_high(d, oprsz, desc); 415 } 416 417 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 418 { 419 intptr_t oprsz = simd_oprsz(desc); 420 intptr_t i; 421 422 if (c == 0) { 423 oprsz = 0; 424 } else { 425 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 426 *(uint32_t *)(d + i) = c; 427 } 428 } 429 clear_high(d, oprsz, desc); 430 } 431 432 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 433 { 434 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 435 } 436 437 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 438 { 439 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 440 } 441 442 void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 443 { 444 intptr_t oprsz = simd_oprsz(desc); 445 intptr_t i; 446 447 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 448 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i); 449 } 450 clear_high(d, oprsz, desc); 451 } 452 453 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 454 { 455 intptr_t oprsz = simd_oprsz(desc); 456 intptr_t i; 457 458 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 459 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i); 460 } 461 clear_high(d, oprsz, desc); 462 } 463 464 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 465 { 466 intptr_t oprsz = simd_oprsz(desc); 467 intptr_t i; 468 469 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 470 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i); 471 } 472 clear_high(d, oprsz, desc); 473 } 474 475 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 476 { 477 intptr_t oprsz = simd_oprsz(desc); 478 intptr_t i; 479 480 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 481 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i); 482 } 483 clear_high(d, oprsz, desc); 484 } 485 486 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 487 { 488 intptr_t oprsz = simd_oprsz(desc); 489 intptr_t i; 490 491 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 492 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i); 493 } 494 clear_high(d, oprsz, desc); 495 } 496 497 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 498 { 499 intptr_t oprsz = simd_oprsz(desc); 500 intptr_t i; 501 502 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 503 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i); 504 } 505 clear_high(d, oprsz, desc); 506 } 507 508 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 509 { 510 intptr_t oprsz = simd_oprsz(desc); 511 intptr_t i; 512 513 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 514 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i)); 515 } 516 clear_high(d, oprsz, desc); 517 } 518 519 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 520 { 521 intptr_t oprsz = simd_oprsz(desc); 522 intptr_t i; 523 524 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 525 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i)); 526 } 527 clear_high(d, oprsz, desc); 528 } 529 530 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 531 { 532 intptr_t oprsz = simd_oprsz(desc); 533 intptr_t i; 534 535 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 536 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i)); 537 } 538 clear_high(d, oprsz, desc); 539 } 540 541 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 542 { 543 intptr_t oprsz = simd_oprsz(desc); 544 intptr_t i; 545 546 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 547 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b; 548 } 549 clear_high(d, oprsz, desc); 550 } 551 552 void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc) 553 { 554 intptr_t oprsz = simd_oprsz(desc); 555 intptr_t i; 556 557 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 558 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b; 559 } 560 clear_high(d, oprsz, desc); 561 } 562 563 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 564 { 565 intptr_t oprsz = simd_oprsz(desc); 566 intptr_t i; 567 568 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 569 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b; 570 } 571 clear_high(d, oprsz, desc); 572 } 573 574 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 575 { 576 intptr_t oprsz = simd_oprsz(desc); 577 intptr_t i; 578 579 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 580 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b; 581 } 582 clear_high(d, oprsz, desc); 583 } 584 585 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 586 { 587 intptr_t oprsz = simd_oprsz(desc); 588 int shift = simd_data(desc); 589 intptr_t i; 590 591 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 592 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift; 593 } 594 clear_high(d, oprsz, desc); 595 } 596 597 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 598 { 599 intptr_t oprsz = simd_oprsz(desc); 600 int shift = simd_data(desc); 601 intptr_t i; 602 603 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 604 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift; 605 } 606 clear_high(d, oprsz, desc); 607 } 608 609 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 610 { 611 intptr_t oprsz = simd_oprsz(desc); 612 int shift = simd_data(desc); 613 intptr_t i; 614 615 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 616 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift; 617 } 618 clear_high(d, oprsz, desc); 619 } 620 621 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 622 { 623 intptr_t oprsz = simd_oprsz(desc); 624 int shift = simd_data(desc); 625 intptr_t i; 626 627 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 628 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift; 629 } 630 clear_high(d, oprsz, desc); 631 } 632 633 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 634 { 635 intptr_t oprsz = simd_oprsz(desc); 636 int shift = simd_data(desc); 637 intptr_t i; 638 639 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 640 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift; 641 } 642 clear_high(d, oprsz, desc); 643 } 644 645 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 646 { 647 intptr_t oprsz = simd_oprsz(desc); 648 int shift = simd_data(desc); 649 intptr_t i; 650 651 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 652 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift; 653 } 654 clear_high(d, oprsz, desc); 655 } 656 657 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 658 { 659 intptr_t oprsz = simd_oprsz(desc); 660 int shift = simd_data(desc); 661 intptr_t i; 662 663 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 664 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift; 665 } 666 clear_high(d, oprsz, desc); 667 } 668 669 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 670 { 671 intptr_t oprsz = simd_oprsz(desc); 672 int shift = simd_data(desc); 673 intptr_t i; 674 675 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 676 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift; 677 } 678 clear_high(d, oprsz, desc); 679 } 680 681 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 682 { 683 intptr_t oprsz = simd_oprsz(desc); 684 int shift = simd_data(desc); 685 intptr_t i; 686 687 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 688 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift; 689 } 690 clear_high(d, oprsz, desc); 691 } 692 693 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 694 { 695 intptr_t oprsz = simd_oprsz(desc); 696 int shift = simd_data(desc); 697 intptr_t i; 698 699 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 700 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift; 701 } 702 clear_high(d, oprsz, desc); 703 } 704 705 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 706 { 707 intptr_t oprsz = simd_oprsz(desc); 708 int shift = simd_data(desc); 709 intptr_t i; 710 711 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 712 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift; 713 } 714 clear_high(d, oprsz, desc); 715 } 716 717 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 718 { 719 intptr_t oprsz = simd_oprsz(desc); 720 int shift = simd_data(desc); 721 intptr_t i; 722 723 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 724 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift; 725 } 726 clear_high(d, oprsz, desc); 727 } 728 729 void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc) 730 { 731 intptr_t oprsz = simd_oprsz(desc); 732 int shift = simd_data(desc); 733 intptr_t i; 734 735 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 736 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift); 737 } 738 clear_high(d, oprsz, desc); 739 } 740 741 void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc) 742 { 743 intptr_t oprsz = simd_oprsz(desc); 744 int shift = simd_data(desc); 745 intptr_t i; 746 747 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 748 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift); 749 } 750 clear_high(d, oprsz, desc); 751 } 752 753 void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc) 754 { 755 intptr_t oprsz = simd_oprsz(desc); 756 int shift = simd_data(desc); 757 intptr_t i; 758 759 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 760 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift); 761 } 762 clear_high(d, oprsz, desc); 763 } 764 765 void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc) 766 { 767 intptr_t oprsz = simd_oprsz(desc); 768 int shift = simd_data(desc); 769 intptr_t i; 770 771 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 772 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift); 773 } 774 clear_high(d, oprsz, desc); 775 } 776 777 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 778 { 779 intptr_t oprsz = simd_oprsz(desc); 780 intptr_t i; 781 782 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 783 uint8_t sh = *(uint8_t *)(b + i) & 7; 784 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 785 } 786 clear_high(d, oprsz, desc); 787 } 788 789 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 790 { 791 intptr_t oprsz = simd_oprsz(desc); 792 intptr_t i; 793 794 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 795 uint8_t sh = *(uint16_t *)(b + i) & 15; 796 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 797 } 798 clear_high(d, oprsz, desc); 799 } 800 801 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 802 { 803 intptr_t oprsz = simd_oprsz(desc); 804 intptr_t i; 805 806 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 807 uint8_t sh = *(uint32_t *)(b + i) & 31; 808 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 809 } 810 clear_high(d, oprsz, desc); 811 } 812 813 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 814 { 815 intptr_t oprsz = simd_oprsz(desc); 816 intptr_t i; 817 818 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 819 uint8_t sh = *(uint64_t *)(b + i) & 63; 820 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 821 } 822 clear_high(d, oprsz, desc); 823 } 824 825 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 826 { 827 intptr_t oprsz = simd_oprsz(desc); 828 intptr_t i; 829 830 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 831 uint8_t sh = *(uint8_t *)(b + i) & 7; 832 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 833 } 834 clear_high(d, oprsz, desc); 835 } 836 837 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 838 { 839 intptr_t oprsz = simd_oprsz(desc); 840 intptr_t i; 841 842 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 843 uint8_t sh = *(uint16_t *)(b + i) & 15; 844 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 845 } 846 clear_high(d, oprsz, desc); 847 } 848 849 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 850 { 851 intptr_t oprsz = simd_oprsz(desc); 852 intptr_t i; 853 854 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 855 uint8_t sh = *(uint32_t *)(b + i) & 31; 856 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 857 } 858 clear_high(d, oprsz, desc); 859 } 860 861 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 862 { 863 intptr_t oprsz = simd_oprsz(desc); 864 intptr_t i; 865 866 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 867 uint8_t sh = *(uint64_t *)(b + i) & 63; 868 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 869 } 870 clear_high(d, oprsz, desc); 871 } 872 873 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 874 { 875 intptr_t oprsz = simd_oprsz(desc); 876 intptr_t i; 877 878 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 879 uint8_t sh = *(uint8_t *)(b + i) & 7; 880 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 881 } 882 clear_high(d, oprsz, desc); 883 } 884 885 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 886 { 887 intptr_t oprsz = simd_oprsz(desc); 888 intptr_t i; 889 890 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 891 uint8_t sh = *(uint16_t *)(b + i) & 15; 892 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 893 } 894 clear_high(d, oprsz, desc); 895 } 896 897 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 898 { 899 intptr_t oprsz = simd_oprsz(desc); 900 intptr_t i; 901 902 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 903 uint8_t sh = *(uint32_t *)(b + i) & 31; 904 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 905 } 906 clear_high(d, oprsz, desc); 907 } 908 909 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 910 { 911 intptr_t oprsz = simd_oprsz(desc); 912 intptr_t i; 913 914 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 915 uint8_t sh = *(uint64_t *)(b + i) & 63; 916 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 917 } 918 clear_high(d, oprsz, desc); 919 } 920 921 void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc) 922 { 923 intptr_t oprsz = simd_oprsz(desc); 924 intptr_t i; 925 926 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 927 uint8_t sh = *(uint8_t *)(b + i) & 7; 928 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh); 929 } 930 clear_high(d, oprsz, desc); 931 } 932 933 void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc) 934 { 935 intptr_t oprsz = simd_oprsz(desc); 936 intptr_t i; 937 938 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 939 uint8_t sh = *(uint16_t *)(b + i) & 15; 940 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh); 941 } 942 clear_high(d, oprsz, desc); 943 } 944 945 void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc) 946 { 947 intptr_t oprsz = simd_oprsz(desc); 948 intptr_t i; 949 950 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 951 uint8_t sh = *(uint32_t *)(b + i) & 31; 952 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh); 953 } 954 clear_high(d, oprsz, desc); 955 } 956 957 void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc) 958 { 959 intptr_t oprsz = simd_oprsz(desc); 960 intptr_t i; 961 962 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 963 uint8_t sh = *(uint64_t *)(b + i) & 63; 964 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh); 965 } 966 clear_high(d, oprsz, desc); 967 } 968 969 void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc) 970 { 971 intptr_t oprsz = simd_oprsz(desc); 972 intptr_t i; 973 974 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 975 uint8_t sh = *(uint8_t *)(b + i) & 7; 976 *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh); 977 } 978 clear_high(d, oprsz, desc); 979 } 980 981 void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc) 982 { 983 intptr_t oprsz = simd_oprsz(desc); 984 intptr_t i; 985 986 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 987 uint8_t sh = *(uint16_t *)(b + i) & 15; 988 *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh); 989 } 990 clear_high(d, oprsz, desc); 991 } 992 993 void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc) 994 { 995 intptr_t oprsz = simd_oprsz(desc); 996 intptr_t i; 997 998 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 999 uint8_t sh = *(uint32_t *)(b + i) & 31; 1000 *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh); 1001 } 1002 clear_high(d, oprsz, desc); 1003 } 1004 1005 void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc) 1006 { 1007 intptr_t oprsz = simd_oprsz(desc); 1008 intptr_t i; 1009 1010 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1011 uint8_t sh = *(uint64_t *)(b + i) & 63; 1012 *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh); 1013 } 1014 clear_high(d, oprsz, desc); 1015 } 1016 1017 #define DO_CMP1(NAME, TYPE, OP) \ 1018 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 1019 { \ 1020 intptr_t oprsz = simd_oprsz(desc); \ 1021 intptr_t i; \ 1022 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 1023 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 1024 } \ 1025 clear_high(d, oprsz, desc); \ 1026 } 1027 1028 #define DO_CMP2(SZ) \ 1029 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \ 1030 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \ 1031 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \ 1032 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \ 1033 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \ 1034 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=) 1035 1036 DO_CMP2(8) 1037 DO_CMP2(16) 1038 DO_CMP2(32) 1039 DO_CMP2(64) 1040 1041 #undef DO_CMP1 1042 #undef DO_CMP2 1043 1044 #define DO_CMP1(NAME, TYPE, OP) \ 1045 void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \ 1046 { \ 1047 intptr_t oprsz = simd_oprsz(desc); \ 1048 TYPE inv = simd_data(desc), b = b64; \ 1049 for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \ 1050 *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \ 1051 } \ 1052 clear_high(d, oprsz, desc); \ 1053 } 1054 1055 #define DO_CMP2(SZ) \ 1056 DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \ 1057 DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \ 1058 DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \ 1059 DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \ 1060 DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=) 1061 1062 DO_CMP2(8) 1063 DO_CMP2(16) 1064 DO_CMP2(32) 1065 DO_CMP2(64) 1066 1067 #undef DO_CMP1 1068 #undef DO_CMP2 1069 1070 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 1071 { 1072 intptr_t oprsz = simd_oprsz(desc); 1073 intptr_t i; 1074 1075 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1076 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 1077 if (r > INT8_MAX) { 1078 r = INT8_MAX; 1079 } else if (r < INT8_MIN) { 1080 r = INT8_MIN; 1081 } 1082 *(int8_t *)(d + i) = r; 1083 } 1084 clear_high(d, oprsz, desc); 1085 } 1086 1087 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 1088 { 1089 intptr_t oprsz = simd_oprsz(desc); 1090 intptr_t i; 1091 1092 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1093 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 1094 if (r > INT16_MAX) { 1095 r = INT16_MAX; 1096 } else if (r < INT16_MIN) { 1097 r = INT16_MIN; 1098 } 1099 *(int16_t *)(d + i) = r; 1100 } 1101 clear_high(d, oprsz, desc); 1102 } 1103 1104 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 1105 { 1106 intptr_t oprsz = simd_oprsz(desc); 1107 intptr_t i; 1108 1109 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1110 int32_t ai = *(int32_t *)(a + i); 1111 int32_t bi = *(int32_t *)(b + i); 1112 int32_t di; 1113 if (sadd32_overflow(ai, bi, &di)) { 1114 di = (di < 0 ? INT32_MAX : INT32_MIN); 1115 } 1116 *(int32_t *)(d + i) = di; 1117 } 1118 clear_high(d, oprsz, desc); 1119 } 1120 1121 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 1122 { 1123 intptr_t oprsz = simd_oprsz(desc); 1124 intptr_t i; 1125 1126 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1127 int64_t ai = *(int64_t *)(a + i); 1128 int64_t bi = *(int64_t *)(b + i); 1129 int64_t di; 1130 if (sadd64_overflow(ai, bi, &di)) { 1131 di = (di < 0 ? INT64_MAX : INT64_MIN); 1132 } 1133 *(int64_t *)(d + i) = di; 1134 } 1135 clear_high(d, oprsz, desc); 1136 } 1137 1138 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 1139 { 1140 intptr_t oprsz = simd_oprsz(desc); 1141 intptr_t i; 1142 1143 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1144 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 1145 if (r > INT8_MAX) { 1146 r = INT8_MAX; 1147 } else if (r < INT8_MIN) { 1148 r = INT8_MIN; 1149 } 1150 *(uint8_t *)(d + i) = r; 1151 } 1152 clear_high(d, oprsz, desc); 1153 } 1154 1155 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 1156 { 1157 intptr_t oprsz = simd_oprsz(desc); 1158 intptr_t i; 1159 1160 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1161 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1162 if (r > INT16_MAX) { 1163 r = INT16_MAX; 1164 } else if (r < INT16_MIN) { 1165 r = INT16_MIN; 1166 } 1167 *(int16_t *)(d + i) = r; 1168 } 1169 clear_high(d, oprsz, desc); 1170 } 1171 1172 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1173 { 1174 intptr_t oprsz = simd_oprsz(desc); 1175 intptr_t i; 1176 1177 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1178 int32_t ai = *(int32_t *)(a + i); 1179 int32_t bi = *(int32_t *)(b + i); 1180 int32_t di; 1181 if (ssub32_overflow(ai, bi, &di)) { 1182 di = (di < 0 ? INT32_MAX : INT32_MIN); 1183 } 1184 *(int32_t *)(d + i) = di; 1185 } 1186 clear_high(d, oprsz, desc); 1187 } 1188 1189 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1190 { 1191 intptr_t oprsz = simd_oprsz(desc); 1192 intptr_t i; 1193 1194 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1195 int64_t ai = *(int64_t *)(a + i); 1196 int64_t bi = *(int64_t *)(b + i); 1197 int64_t di; 1198 if (ssub64_overflow(ai, bi, &di)) { 1199 di = (di < 0 ? INT64_MAX : INT64_MIN); 1200 } 1201 *(int64_t *)(d + i) = di; 1202 } 1203 clear_high(d, oprsz, desc); 1204 } 1205 1206 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1207 { 1208 intptr_t oprsz = simd_oprsz(desc); 1209 intptr_t i; 1210 1211 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1212 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1213 if (r > UINT8_MAX) { 1214 r = UINT8_MAX; 1215 } 1216 *(uint8_t *)(d + i) = r; 1217 } 1218 clear_high(d, oprsz, desc); 1219 } 1220 1221 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1222 { 1223 intptr_t oprsz = simd_oprsz(desc); 1224 intptr_t i; 1225 1226 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1227 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1228 if (r > UINT16_MAX) { 1229 r = UINT16_MAX; 1230 } 1231 *(uint16_t *)(d + i) = r; 1232 } 1233 clear_high(d, oprsz, desc); 1234 } 1235 1236 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1237 { 1238 intptr_t oprsz = simd_oprsz(desc); 1239 intptr_t i; 1240 1241 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1242 uint32_t ai = *(uint32_t *)(a + i); 1243 uint32_t bi = *(uint32_t *)(b + i); 1244 uint32_t di; 1245 if (uadd32_overflow(ai, bi, &di)) { 1246 di = UINT32_MAX; 1247 } 1248 *(uint32_t *)(d + i) = di; 1249 } 1250 clear_high(d, oprsz, desc); 1251 } 1252 1253 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1254 { 1255 intptr_t oprsz = simd_oprsz(desc); 1256 intptr_t i; 1257 1258 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1259 uint64_t ai = *(uint64_t *)(a + i); 1260 uint64_t bi = *(uint64_t *)(b + i); 1261 uint64_t di; 1262 if (uadd64_overflow(ai, bi, &di)) { 1263 di = UINT64_MAX; 1264 } 1265 *(uint64_t *)(d + i) = di; 1266 } 1267 clear_high(d, oprsz, desc); 1268 } 1269 1270 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1271 { 1272 intptr_t oprsz = simd_oprsz(desc); 1273 intptr_t i; 1274 1275 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1276 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1277 if (r < 0) { 1278 r = 0; 1279 } 1280 *(uint8_t *)(d + i) = r; 1281 } 1282 clear_high(d, oprsz, desc); 1283 } 1284 1285 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1286 { 1287 intptr_t oprsz = simd_oprsz(desc); 1288 intptr_t i; 1289 1290 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1291 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1292 if (r < 0) { 1293 r = 0; 1294 } 1295 *(uint16_t *)(d + i) = r; 1296 } 1297 clear_high(d, oprsz, desc); 1298 } 1299 1300 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1301 { 1302 intptr_t oprsz = simd_oprsz(desc); 1303 intptr_t i; 1304 1305 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1306 uint32_t ai = *(uint32_t *)(a + i); 1307 uint32_t bi = *(uint32_t *)(b + i); 1308 uint32_t di; 1309 if (usub32_overflow(ai, bi, &di)) { 1310 di = 0; 1311 } 1312 *(uint32_t *)(d + i) = di; 1313 } 1314 clear_high(d, oprsz, desc); 1315 } 1316 1317 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1318 { 1319 intptr_t oprsz = simd_oprsz(desc); 1320 intptr_t i; 1321 1322 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1323 uint64_t ai = *(uint64_t *)(a + i); 1324 uint64_t bi = *(uint64_t *)(b + i); 1325 uint64_t di; 1326 if (usub64_overflow(ai, bi, &di)) { 1327 di = 0; 1328 } 1329 *(uint64_t *)(d + i) = di; 1330 } 1331 clear_high(d, oprsz, desc); 1332 } 1333 1334 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1335 { 1336 intptr_t oprsz = simd_oprsz(desc); 1337 intptr_t i; 1338 1339 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1340 int8_t aa = *(int8_t *)(a + i); 1341 int8_t bb = *(int8_t *)(b + i); 1342 int8_t dd = aa < bb ? aa : bb; 1343 *(int8_t *)(d + i) = dd; 1344 } 1345 clear_high(d, oprsz, desc); 1346 } 1347 1348 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1349 { 1350 intptr_t oprsz = simd_oprsz(desc); 1351 intptr_t i; 1352 1353 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1354 int16_t aa = *(int16_t *)(a + i); 1355 int16_t bb = *(int16_t *)(b + i); 1356 int16_t dd = aa < bb ? aa : bb; 1357 *(int16_t *)(d + i) = dd; 1358 } 1359 clear_high(d, oprsz, desc); 1360 } 1361 1362 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1363 { 1364 intptr_t oprsz = simd_oprsz(desc); 1365 intptr_t i; 1366 1367 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1368 int32_t aa = *(int32_t *)(a + i); 1369 int32_t bb = *(int32_t *)(b + i); 1370 int32_t dd = aa < bb ? aa : bb; 1371 *(int32_t *)(d + i) = dd; 1372 } 1373 clear_high(d, oprsz, desc); 1374 } 1375 1376 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1377 { 1378 intptr_t oprsz = simd_oprsz(desc); 1379 intptr_t i; 1380 1381 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1382 int64_t aa = *(int64_t *)(a + i); 1383 int64_t bb = *(int64_t *)(b + i); 1384 int64_t dd = aa < bb ? aa : bb; 1385 *(int64_t *)(d + i) = dd; 1386 } 1387 clear_high(d, oprsz, desc); 1388 } 1389 1390 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1391 { 1392 intptr_t oprsz = simd_oprsz(desc); 1393 intptr_t i; 1394 1395 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1396 int8_t aa = *(int8_t *)(a + i); 1397 int8_t bb = *(int8_t *)(b + i); 1398 int8_t dd = aa > bb ? aa : bb; 1399 *(int8_t *)(d + i) = dd; 1400 } 1401 clear_high(d, oprsz, desc); 1402 } 1403 1404 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1405 { 1406 intptr_t oprsz = simd_oprsz(desc); 1407 intptr_t i; 1408 1409 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1410 int16_t aa = *(int16_t *)(a + i); 1411 int16_t bb = *(int16_t *)(b + i); 1412 int16_t dd = aa > bb ? aa : bb; 1413 *(int16_t *)(d + i) = dd; 1414 } 1415 clear_high(d, oprsz, desc); 1416 } 1417 1418 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1419 { 1420 intptr_t oprsz = simd_oprsz(desc); 1421 intptr_t i; 1422 1423 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1424 int32_t aa = *(int32_t *)(a + i); 1425 int32_t bb = *(int32_t *)(b + i); 1426 int32_t dd = aa > bb ? aa : bb; 1427 *(int32_t *)(d + i) = dd; 1428 } 1429 clear_high(d, oprsz, desc); 1430 } 1431 1432 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1433 { 1434 intptr_t oprsz = simd_oprsz(desc); 1435 intptr_t i; 1436 1437 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1438 int64_t aa = *(int64_t *)(a + i); 1439 int64_t bb = *(int64_t *)(b + i); 1440 int64_t dd = aa > bb ? aa : bb; 1441 *(int64_t *)(d + i) = dd; 1442 } 1443 clear_high(d, oprsz, desc); 1444 } 1445 1446 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1447 { 1448 intptr_t oprsz = simd_oprsz(desc); 1449 intptr_t i; 1450 1451 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1452 uint8_t aa = *(uint8_t *)(a + i); 1453 uint8_t bb = *(uint8_t *)(b + i); 1454 uint8_t dd = aa < bb ? aa : bb; 1455 *(uint8_t *)(d + i) = dd; 1456 } 1457 clear_high(d, oprsz, desc); 1458 } 1459 1460 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1461 { 1462 intptr_t oprsz = simd_oprsz(desc); 1463 intptr_t i; 1464 1465 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1466 uint16_t aa = *(uint16_t *)(a + i); 1467 uint16_t bb = *(uint16_t *)(b + i); 1468 uint16_t dd = aa < bb ? aa : bb; 1469 *(uint16_t *)(d + i) = dd; 1470 } 1471 clear_high(d, oprsz, desc); 1472 } 1473 1474 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1475 { 1476 intptr_t oprsz = simd_oprsz(desc); 1477 intptr_t i; 1478 1479 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1480 uint32_t aa = *(uint32_t *)(a + i); 1481 uint32_t bb = *(uint32_t *)(b + i); 1482 uint32_t dd = aa < bb ? aa : bb; 1483 *(uint32_t *)(d + i) = dd; 1484 } 1485 clear_high(d, oprsz, desc); 1486 } 1487 1488 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1489 { 1490 intptr_t oprsz = simd_oprsz(desc); 1491 intptr_t i; 1492 1493 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1494 uint64_t aa = *(uint64_t *)(a + i); 1495 uint64_t bb = *(uint64_t *)(b + i); 1496 uint64_t dd = aa < bb ? aa : bb; 1497 *(uint64_t *)(d + i) = dd; 1498 } 1499 clear_high(d, oprsz, desc); 1500 } 1501 1502 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1503 { 1504 intptr_t oprsz = simd_oprsz(desc); 1505 intptr_t i; 1506 1507 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1508 uint8_t aa = *(uint8_t *)(a + i); 1509 uint8_t bb = *(uint8_t *)(b + i); 1510 uint8_t dd = aa > bb ? aa : bb; 1511 *(uint8_t *)(d + i) = dd; 1512 } 1513 clear_high(d, oprsz, desc); 1514 } 1515 1516 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1517 { 1518 intptr_t oprsz = simd_oprsz(desc); 1519 intptr_t i; 1520 1521 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1522 uint16_t aa = *(uint16_t *)(a + i); 1523 uint16_t bb = *(uint16_t *)(b + i); 1524 uint16_t dd = aa > bb ? aa : bb; 1525 *(uint16_t *)(d + i) = dd; 1526 } 1527 clear_high(d, oprsz, desc); 1528 } 1529 1530 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1531 { 1532 intptr_t oprsz = simd_oprsz(desc); 1533 intptr_t i; 1534 1535 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1536 uint32_t aa = *(uint32_t *)(a + i); 1537 uint32_t bb = *(uint32_t *)(b + i); 1538 uint32_t dd = aa > bb ? aa : bb; 1539 *(uint32_t *)(d + i) = dd; 1540 } 1541 clear_high(d, oprsz, desc); 1542 } 1543 1544 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1545 { 1546 intptr_t oprsz = simd_oprsz(desc); 1547 intptr_t i; 1548 1549 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1550 uint64_t aa = *(uint64_t *)(a + i); 1551 uint64_t bb = *(uint64_t *)(b + i); 1552 uint64_t dd = aa > bb ? aa : bb; 1553 *(uint64_t *)(d + i) = dd; 1554 } 1555 clear_high(d, oprsz, desc); 1556 } 1557 1558 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 1559 { 1560 intptr_t oprsz = simd_oprsz(desc); 1561 intptr_t i; 1562 1563 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1564 uint64_t aa = *(uint64_t *)(a + i); 1565 uint64_t bb = *(uint64_t *)(b + i); 1566 uint64_t cc = *(uint64_t *)(c + i); 1567 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa); 1568 } 1569 clear_high(d, oprsz, desc); 1570 } 1571