1 /* 2 * Generic vectorized operation runtime 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/host-utils.h" 22 #include "cpu.h" 23 #include "exec/helper-proto.h" 24 #include "tcg-gvec-desc.h" 25 26 27 /* Virtually all hosts support 16-byte vectors. Those that don't can emulate 28 * them via GCC's generic vector extension. This turns out to be simpler and 29 * more reliable than getting the compiler to autovectorize. 30 * 31 * In tcg-op-gvec.c, we asserted that both the size and alignment of the data 32 * are multiples of 16. 33 * 34 * When the compiler does not support all of the operations we require, the 35 * loops are written so that we can always fall back on the base types. 36 */ 37 #ifdef CONFIG_VECTOR16 38 typedef uint8_t vec8 __attribute__((vector_size(16))); 39 typedef uint16_t vec16 __attribute__((vector_size(16))); 40 typedef uint32_t vec32 __attribute__((vector_size(16))); 41 typedef uint64_t vec64 __attribute__((vector_size(16))); 42 43 typedef int8_t svec8 __attribute__((vector_size(16))); 44 typedef int16_t svec16 __attribute__((vector_size(16))); 45 typedef int32_t svec32 __attribute__((vector_size(16))); 46 typedef int64_t svec64 __attribute__((vector_size(16))); 47 48 #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X } 49 #define DUP8(X) { X, X, X, X, X, X, X, X } 50 #define DUP4(X) { X, X, X, X } 51 #define DUP2(X) { X, X } 52 #else 53 typedef uint8_t vec8; 54 typedef uint16_t vec16; 55 typedef uint32_t vec32; 56 typedef uint64_t vec64; 57 58 typedef int8_t svec8; 59 typedef int16_t svec16; 60 typedef int32_t svec32; 61 typedef int64_t svec64; 62 63 #define DUP16(X) X 64 #define DUP8(X) X 65 #define DUP4(X) X 66 #define DUP2(X) X 67 #endif /* CONFIG_VECTOR16 */ 68 69 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 70 { 71 intptr_t maxsz = simd_maxsz(desc); 72 intptr_t i; 73 74 if (unlikely(maxsz > oprsz)) { 75 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 76 *(uint64_t *)(d + i) = 0; 77 } 78 } 79 } 80 81 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 82 { 83 intptr_t oprsz = simd_oprsz(desc); 84 intptr_t i; 85 86 for (i = 0; i < oprsz; i += sizeof(vec8)) { 87 *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i); 88 } 89 clear_high(d, oprsz, desc); 90 } 91 92 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 93 { 94 intptr_t oprsz = simd_oprsz(desc); 95 intptr_t i; 96 97 for (i = 0; i < oprsz; i += sizeof(vec16)) { 98 *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i); 99 } 100 clear_high(d, oprsz, desc); 101 } 102 103 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 104 { 105 intptr_t oprsz = simd_oprsz(desc); 106 intptr_t i; 107 108 for (i = 0; i < oprsz; i += sizeof(vec32)) { 109 *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i); 110 } 111 clear_high(d, oprsz, desc); 112 } 113 114 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 115 { 116 intptr_t oprsz = simd_oprsz(desc); 117 intptr_t i; 118 119 for (i = 0; i < oprsz; i += sizeof(vec64)) { 120 *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i); 121 } 122 clear_high(d, oprsz, desc); 123 } 124 125 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 126 { 127 intptr_t oprsz = simd_oprsz(desc); 128 intptr_t i; 129 130 for (i = 0; i < oprsz; i += sizeof(vec8)) { 131 *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i); 132 } 133 clear_high(d, oprsz, desc); 134 } 135 136 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 137 { 138 intptr_t oprsz = simd_oprsz(desc); 139 intptr_t i; 140 141 for (i = 0; i < oprsz; i += sizeof(vec16)) { 142 *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i); 143 } 144 clear_high(d, oprsz, desc); 145 } 146 147 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 148 { 149 intptr_t oprsz = simd_oprsz(desc); 150 intptr_t i; 151 152 for (i = 0; i < oprsz; i += sizeof(vec32)) { 153 *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i); 154 } 155 clear_high(d, oprsz, desc); 156 } 157 158 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 159 { 160 intptr_t oprsz = simd_oprsz(desc); 161 intptr_t i; 162 163 for (i = 0; i < oprsz; i += sizeof(vec64)) { 164 *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i); 165 } 166 clear_high(d, oprsz, desc); 167 } 168 169 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 170 { 171 intptr_t oprsz = simd_oprsz(desc); 172 intptr_t i; 173 174 for (i = 0; i < oprsz; i += sizeof(vec8)) { 175 *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i); 176 } 177 clear_high(d, oprsz, desc); 178 } 179 180 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 181 { 182 intptr_t oprsz = simd_oprsz(desc); 183 intptr_t i; 184 185 for (i = 0; i < oprsz; i += sizeof(vec16)) { 186 *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i); 187 } 188 clear_high(d, oprsz, desc); 189 } 190 191 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 192 { 193 intptr_t oprsz = simd_oprsz(desc); 194 intptr_t i; 195 196 for (i = 0; i < oprsz; i += sizeof(vec32)) { 197 *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i); 198 } 199 clear_high(d, oprsz, desc); 200 } 201 202 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 203 { 204 intptr_t oprsz = simd_oprsz(desc); 205 intptr_t i; 206 207 for (i = 0; i < oprsz; i += sizeof(vec64)) { 208 *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i); 209 } 210 clear_high(d, oprsz, desc); 211 } 212 213 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 214 { 215 intptr_t oprsz = simd_oprsz(desc); 216 intptr_t i; 217 218 for (i = 0; i < oprsz; i += sizeof(vec8)) { 219 *(vec8 *)(d + i) = -*(vec8 *)(a + i); 220 } 221 clear_high(d, oprsz, desc); 222 } 223 224 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 225 { 226 intptr_t oprsz = simd_oprsz(desc); 227 intptr_t i; 228 229 for (i = 0; i < oprsz; i += sizeof(vec16)) { 230 *(vec16 *)(d + i) = -*(vec16 *)(a + i); 231 } 232 clear_high(d, oprsz, desc); 233 } 234 235 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 236 { 237 intptr_t oprsz = simd_oprsz(desc); 238 intptr_t i; 239 240 for (i = 0; i < oprsz; i += sizeof(vec32)) { 241 *(vec32 *)(d + i) = -*(vec32 *)(a + i); 242 } 243 clear_high(d, oprsz, desc); 244 } 245 246 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 247 { 248 intptr_t oprsz = simd_oprsz(desc); 249 intptr_t i; 250 251 for (i = 0; i < oprsz; i += sizeof(vec64)) { 252 *(vec64 *)(d + i) = -*(vec64 *)(a + i); 253 } 254 clear_high(d, oprsz, desc); 255 } 256 257 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 258 { 259 intptr_t oprsz = simd_oprsz(desc); 260 261 memcpy(d, a, oprsz); 262 clear_high(d, oprsz, desc); 263 } 264 265 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 266 { 267 intptr_t oprsz = simd_oprsz(desc); 268 intptr_t i; 269 270 if (c == 0) { 271 oprsz = 0; 272 } else { 273 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 274 *(uint64_t *)(d + i) = c; 275 } 276 } 277 clear_high(d, oprsz, desc); 278 } 279 280 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 281 { 282 intptr_t oprsz = simd_oprsz(desc); 283 intptr_t i; 284 285 if (c == 0) { 286 oprsz = 0; 287 } else { 288 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 289 *(uint32_t *)(d + i) = c; 290 } 291 } 292 clear_high(d, oprsz, desc); 293 } 294 295 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 296 { 297 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 298 } 299 300 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 301 { 302 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 303 } 304 305 void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 306 { 307 intptr_t oprsz = simd_oprsz(desc); 308 intptr_t i; 309 310 for (i = 0; i < oprsz; i += sizeof(vec64)) { 311 *(vec64 *)(d + i) = ~*(vec64 *)(a + i); 312 } 313 clear_high(d, oprsz, desc); 314 } 315 316 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 317 { 318 intptr_t oprsz = simd_oprsz(desc); 319 intptr_t i; 320 321 for (i = 0; i < oprsz; i += sizeof(vec64)) { 322 *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i); 323 } 324 clear_high(d, oprsz, desc); 325 } 326 327 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 328 { 329 intptr_t oprsz = simd_oprsz(desc); 330 intptr_t i; 331 332 for (i = 0; i < oprsz; i += sizeof(vec64)) { 333 *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i); 334 } 335 clear_high(d, oprsz, desc); 336 } 337 338 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 339 { 340 intptr_t oprsz = simd_oprsz(desc); 341 intptr_t i; 342 343 for (i = 0; i < oprsz; i += sizeof(vec64)) { 344 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i); 345 } 346 clear_high(d, oprsz, desc); 347 } 348 349 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 350 { 351 intptr_t oprsz = simd_oprsz(desc); 352 intptr_t i; 353 354 for (i = 0; i < oprsz; i += sizeof(vec64)) { 355 *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i); 356 } 357 clear_high(d, oprsz, desc); 358 } 359 360 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 361 { 362 intptr_t oprsz = simd_oprsz(desc); 363 intptr_t i; 364 365 for (i = 0; i < oprsz; i += sizeof(vec64)) { 366 *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i); 367 } 368 clear_high(d, oprsz, desc); 369 } 370 371 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 372 { 373 intptr_t oprsz = simd_oprsz(desc); 374 int shift = simd_data(desc); 375 intptr_t i; 376 377 for (i = 0; i < oprsz; i += sizeof(vec8)) { 378 *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift; 379 } 380 clear_high(d, oprsz, desc); 381 } 382 383 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 384 { 385 intptr_t oprsz = simd_oprsz(desc); 386 int shift = simd_data(desc); 387 intptr_t i; 388 389 for (i = 0; i < oprsz; i += sizeof(vec16)) { 390 *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift; 391 } 392 clear_high(d, oprsz, desc); 393 } 394 395 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 396 { 397 intptr_t oprsz = simd_oprsz(desc); 398 int shift = simd_data(desc); 399 intptr_t i; 400 401 for (i = 0; i < oprsz; i += sizeof(vec32)) { 402 *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift; 403 } 404 clear_high(d, oprsz, desc); 405 } 406 407 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 408 { 409 intptr_t oprsz = simd_oprsz(desc); 410 int shift = simd_data(desc); 411 intptr_t i; 412 413 for (i = 0; i < oprsz; i += sizeof(vec64)) { 414 *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift; 415 } 416 clear_high(d, oprsz, desc); 417 } 418 419 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 420 { 421 intptr_t oprsz = simd_oprsz(desc); 422 int shift = simd_data(desc); 423 intptr_t i; 424 425 for (i = 0; i < oprsz; i += sizeof(vec8)) { 426 *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift; 427 } 428 clear_high(d, oprsz, desc); 429 } 430 431 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 432 { 433 intptr_t oprsz = simd_oprsz(desc); 434 int shift = simd_data(desc); 435 intptr_t i; 436 437 for (i = 0; i < oprsz; i += sizeof(vec16)) { 438 *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift; 439 } 440 clear_high(d, oprsz, desc); 441 } 442 443 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 444 { 445 intptr_t oprsz = simd_oprsz(desc); 446 int shift = simd_data(desc); 447 intptr_t i; 448 449 for (i = 0; i < oprsz; i += sizeof(vec32)) { 450 *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift; 451 } 452 clear_high(d, oprsz, desc); 453 } 454 455 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 456 { 457 intptr_t oprsz = simd_oprsz(desc); 458 int shift = simd_data(desc); 459 intptr_t i; 460 461 for (i = 0; i < oprsz; i += sizeof(vec64)) { 462 *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift; 463 } 464 clear_high(d, oprsz, desc); 465 } 466 467 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 468 { 469 intptr_t oprsz = simd_oprsz(desc); 470 int shift = simd_data(desc); 471 intptr_t i; 472 473 for (i = 0; i < oprsz; i += sizeof(vec8)) { 474 *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift; 475 } 476 clear_high(d, oprsz, desc); 477 } 478 479 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 480 { 481 intptr_t oprsz = simd_oprsz(desc); 482 int shift = simd_data(desc); 483 intptr_t i; 484 485 for (i = 0; i < oprsz; i += sizeof(vec16)) { 486 *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift; 487 } 488 clear_high(d, oprsz, desc); 489 } 490 491 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 492 { 493 intptr_t oprsz = simd_oprsz(desc); 494 int shift = simd_data(desc); 495 intptr_t i; 496 497 for (i = 0; i < oprsz; i += sizeof(vec32)) { 498 *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift; 499 } 500 clear_high(d, oprsz, desc); 501 } 502 503 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 504 { 505 intptr_t oprsz = simd_oprsz(desc); 506 int shift = simd_data(desc); 507 intptr_t i; 508 509 for (i = 0; i < oprsz; i += sizeof(vec64)) { 510 *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift; 511 } 512 clear_high(d, oprsz, desc); 513 } 514 515 /* If vectors are enabled, the compiler fills in -1 for true. 516 Otherwise, we must take care of this by hand. */ 517 #ifdef CONFIG_VECTOR16 518 # define DO_CMP0(X) X 519 #else 520 # define DO_CMP0(X) -(X) 521 #endif 522 523 #define DO_CMP1(NAME, TYPE, OP) \ 524 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 525 { \ 526 intptr_t oprsz = simd_oprsz(desc); \ 527 intptr_t i; \ 528 for (i = 0; i < oprsz; i += sizeof(vec64)) { \ 529 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 530 } \ 531 clear_high(d, oprsz, desc); \ 532 } 533 534 #define DO_CMP2(SZ) \ 535 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \ 536 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \ 537 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \ 538 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \ 539 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \ 540 DO_CMP1(gvec_leu##SZ, vec##SZ, <=) 541 542 DO_CMP2(8) 543 DO_CMP2(16) 544 DO_CMP2(32) 545 DO_CMP2(64) 546 547 #undef DO_CMP0 548 #undef DO_CMP1 549 #undef DO_CMP2 550 551 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 552 { 553 intptr_t oprsz = simd_oprsz(desc); 554 intptr_t i; 555 556 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 557 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 558 if (r > INT8_MAX) { 559 r = INT8_MAX; 560 } else if (r < INT8_MIN) { 561 r = INT8_MIN; 562 } 563 *(int8_t *)(d + i) = r; 564 } 565 clear_high(d, oprsz, desc); 566 } 567 568 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 569 { 570 intptr_t oprsz = simd_oprsz(desc); 571 intptr_t i; 572 573 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 574 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 575 if (r > INT16_MAX) { 576 r = INT16_MAX; 577 } else if (r < INT16_MIN) { 578 r = INT16_MIN; 579 } 580 *(int16_t *)(d + i) = r; 581 } 582 clear_high(d, oprsz, desc); 583 } 584 585 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 586 { 587 intptr_t oprsz = simd_oprsz(desc); 588 intptr_t i; 589 590 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 591 int32_t ai = *(int32_t *)(a + i); 592 int32_t bi = *(int32_t *)(b + i); 593 int32_t di = ai + bi; 594 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 595 /* Signed overflow. */ 596 di = (di < 0 ? INT32_MAX : INT32_MIN); 597 } 598 *(int32_t *)(d + i) = di; 599 } 600 clear_high(d, oprsz, desc); 601 } 602 603 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 604 { 605 intptr_t oprsz = simd_oprsz(desc); 606 intptr_t i; 607 608 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 609 int64_t ai = *(int64_t *)(a + i); 610 int64_t bi = *(int64_t *)(b + i); 611 int64_t di = ai + bi; 612 if (((di ^ ai) &~ (ai ^ bi)) < 0) { 613 /* Signed overflow. */ 614 di = (di < 0 ? INT64_MAX : INT64_MIN); 615 } 616 *(int64_t *)(d + i) = di; 617 } 618 clear_high(d, oprsz, desc); 619 } 620 621 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 622 { 623 intptr_t oprsz = simd_oprsz(desc); 624 intptr_t i; 625 626 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 627 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 628 if (r > INT8_MAX) { 629 r = INT8_MAX; 630 } else if (r < INT8_MIN) { 631 r = INT8_MIN; 632 } 633 *(uint8_t *)(d + i) = r; 634 } 635 clear_high(d, oprsz, desc); 636 } 637 638 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 639 { 640 intptr_t oprsz = simd_oprsz(desc); 641 intptr_t i; 642 643 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 644 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 645 if (r > INT16_MAX) { 646 r = INT16_MAX; 647 } else if (r < INT16_MIN) { 648 r = INT16_MIN; 649 } 650 *(int16_t *)(d + i) = r; 651 } 652 clear_high(d, oprsz, desc); 653 } 654 655 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 656 { 657 intptr_t oprsz = simd_oprsz(desc); 658 intptr_t i; 659 660 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 661 int32_t ai = *(int32_t *)(a + i); 662 int32_t bi = *(int32_t *)(b + i); 663 int32_t di = ai - bi; 664 if (((di ^ ai) & (ai ^ bi)) < 0) { 665 /* Signed overflow. */ 666 di = (di < 0 ? INT32_MAX : INT32_MIN); 667 } 668 *(int32_t *)(d + i) = di; 669 } 670 clear_high(d, oprsz, desc); 671 } 672 673 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 674 { 675 intptr_t oprsz = simd_oprsz(desc); 676 intptr_t i; 677 678 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 679 int64_t ai = *(int64_t *)(a + i); 680 int64_t bi = *(int64_t *)(b + i); 681 int64_t di = ai - bi; 682 if (((di ^ ai) & (ai ^ bi)) < 0) { 683 /* Signed overflow. */ 684 di = (di < 0 ? INT64_MAX : INT64_MIN); 685 } 686 *(int64_t *)(d + i) = di; 687 } 688 clear_high(d, oprsz, desc); 689 } 690 691 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 692 { 693 intptr_t oprsz = simd_oprsz(desc); 694 intptr_t i; 695 696 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 697 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 698 if (r > UINT8_MAX) { 699 r = UINT8_MAX; 700 } 701 *(uint8_t *)(d + i) = r; 702 } 703 clear_high(d, oprsz, desc); 704 } 705 706 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 707 { 708 intptr_t oprsz = simd_oprsz(desc); 709 intptr_t i; 710 711 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 712 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 713 if (r > UINT16_MAX) { 714 r = UINT16_MAX; 715 } 716 *(uint16_t *)(d + i) = r; 717 } 718 clear_high(d, oprsz, desc); 719 } 720 721 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 722 { 723 intptr_t oprsz = simd_oprsz(desc); 724 intptr_t i; 725 726 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 727 uint32_t ai = *(uint32_t *)(a + i); 728 uint32_t bi = *(uint32_t *)(b + i); 729 uint32_t di = ai + bi; 730 if (di < ai) { 731 di = UINT32_MAX; 732 } 733 *(uint32_t *)(d + i) = di; 734 } 735 clear_high(d, oprsz, desc); 736 } 737 738 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 739 { 740 intptr_t oprsz = simd_oprsz(desc); 741 intptr_t i; 742 743 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 744 uint64_t ai = *(uint64_t *)(a + i); 745 uint64_t bi = *(uint64_t *)(b + i); 746 uint64_t di = ai + bi; 747 if (di < ai) { 748 di = UINT64_MAX; 749 } 750 *(uint64_t *)(d + i) = di; 751 } 752 clear_high(d, oprsz, desc); 753 } 754 755 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 756 { 757 intptr_t oprsz = simd_oprsz(desc); 758 intptr_t i; 759 760 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 761 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 762 if (r < 0) { 763 r = 0; 764 } 765 *(uint8_t *)(d + i) = r; 766 } 767 clear_high(d, oprsz, desc); 768 } 769 770 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 771 { 772 intptr_t oprsz = simd_oprsz(desc); 773 intptr_t i; 774 775 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 776 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 777 if (r < 0) { 778 r = 0; 779 } 780 *(uint16_t *)(d + i) = r; 781 } 782 clear_high(d, oprsz, desc); 783 } 784 785 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 786 { 787 intptr_t oprsz = simd_oprsz(desc); 788 intptr_t i; 789 790 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 791 uint32_t ai = *(uint32_t *)(a + i); 792 uint32_t bi = *(uint32_t *)(b + i); 793 uint32_t di = ai - bi; 794 if (ai < bi) { 795 di = 0; 796 } 797 *(uint32_t *)(d + i) = di; 798 } 799 clear_high(d, oprsz, desc); 800 } 801 802 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 803 { 804 intptr_t oprsz = simd_oprsz(desc); 805 intptr_t i; 806 807 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 808 uint64_t ai = *(uint64_t *)(a + i); 809 uint64_t bi = *(uint64_t *)(b + i); 810 uint64_t di = ai - bi; 811 if (ai < bi) { 812 di = 0; 813 } 814 *(uint64_t *)(d + i) = di; 815 } 816 clear_high(d, oprsz, desc); 817 } 818