Lines Matching full:i
32 * for (i = 0; i < 256; ++i) {
35 * if ((i >> j) & 1) {
133 * for (i = 0; i < 256; ++i) {
135 * if (i & 0xaa) {
139 * if ((i >> j) & 1) {
143 * printf("[0x%x] = 0x%016lx,\n", i, m);
182 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
185 for (i = 0; i < opr_sz; ++i) { in HELPER()
186 d[i] = do_sqrdmlah_b(n[i], m[i], a[i], false, true); in HELPER()
193 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
196 for (i = 0; i < opr_sz; ++i) { in HELPER()
197 d[i] = do_sqrdmlah_b(n[i], m[i], a[i], true, true); in HELPER()
203 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
206 for (i = 0; i < opr_sz; ++i) { in HELPER()
207 d[i] = do_sqrdmlah_b(n[i], m[i], 0, false, false); in HELPER()
213 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
216 for (i = 0; i < opr_sz; ++i) { in HELPER()
217 d[i] = do_sqrdmlah_b(n[i], m[i], 0, false, true); in HELPER()
257 uintptr_t i; in HELPER() local
259 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
260 d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq); in HELPER()
282 uintptr_t i; in HELPER() local
284 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
285 d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq); in HELPER()
293 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
296 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
297 d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq); in HELPER()
305 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
308 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
309 d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq); in HELPER()
317 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
323 for (i = 0; i < elements; i += 16 / 2) { in HELPER()
324 int16_t mm = m[i]; in HELPER()
326 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq); in HELPER()
335 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
341 for (i = 0; i < elements; i += 16 / 2) { in HELPER()
342 int16_t mm = m[i]; in HELPER()
344 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq); in HELPER()
353 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
359 for (i = 0; i < elements; i += 16 / 2) { in HELPER()
360 int16_t mm = m[i]; in HELPER()
362 d[i + j] = do_sqrdmlah_h(n[i + j], mm, d[i + j], false, true, vq); in HELPER()
371 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
377 for (i = 0; i < elements; i += 16 / 2) { in HELPER()
378 int16_t mm = m[i]; in HELPER()
380 d[i + j] = do_sqrdmlah_h(n[i + j], mm, d[i + j], true, true, vq); in HELPER()
389 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
393 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
394 d[i] = do_sqrdmlah_h(n[i], m[i], a[i], false, true, &discard); in HELPER()
401 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
405 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
406 d[i] = do_sqrdmlah_h(n[i], m[i], a[i], true, true, &discard); in HELPER()
412 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
416 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
417 d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, &discard); in HELPER()
423 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
427 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
428 d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, &discard); in HELPER()
434 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
439 for (i = 0; i < opr_sz / 2; i += 16 / 2) { in HELPER()
440 int16_t mm = m[i]; in HELPER()
442 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, &discard); in HELPER()
449 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
454 for (i = 0; i < opr_sz / 2; i += 16 / 2) { in HELPER()
455 int16_t mm = m[i]; in HELPER()
457 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, &discard); in HELPER()
495 uintptr_t i; in HELPER() local
497 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
498 d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq); in HELPER()
517 uintptr_t i; in HELPER() local
519 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
520 d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq); in HELPER()
528 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
531 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
532 d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq); in HELPER()
540 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
543 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
544 d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq); in HELPER()
552 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
558 for (i = 0; i < elements; i += 16 / 4) { in HELPER()
559 int32_t mm = m[i]; in HELPER()
561 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq); in HELPER()
570 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
576 for (i = 0; i < elements; i += 16 / 4) { in HELPER()
577 int32_t mm = m[i]; in HELPER()
579 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq); in HELPER()
588 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
594 for (i = 0; i < elements; i += 16 / 4) { in HELPER()
595 int32_t mm = m[i]; in HELPER()
597 d[i + j] = do_sqrdmlah_s(n[i + j], mm, d[i + j], false, true, vq); in HELPER()
606 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
612 for (i = 0; i < elements; i += 16 / 4) { in HELPER()
613 int32_t mm = m[i]; in HELPER()
615 d[i + j] = do_sqrdmlah_s(n[i + j], mm, d[i + j], true, true, vq); in HELPER()
624 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
628 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
629 d[i] = do_sqrdmlah_s(n[i], m[i], a[i], false, true, &discard); in HELPER()
636 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
640 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
641 d[i] = do_sqrdmlah_s(n[i], m[i], a[i], true, true, &discard); in HELPER()
647 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
651 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
652 d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, &discard); in HELPER()
658 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
662 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
663 d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, &discard); in HELPER()
669 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
674 for (i = 0; i < opr_sz / 4; i += 16 / 4) { in HELPER()
675 int32_t mm = m[i]; in HELPER()
677 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, &discard); in HELPER()
684 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
689 for (i = 0; i < opr_sz / 4; i += 16 / 4) { in HELPER()
690 int32_t mm = m[i]; in HELPER()
692 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, &discard); in HELPER()
737 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
740 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
741 d[i] = do_sqrdmlah_d(n[i], m[i], a[i], false, true); in HELPER()
748 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
751 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
752 d[i] = do_sqrdmlah_d(n[i], m[i], a[i], true, true); in HELPER()
758 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
761 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
762 d[i] = do_sqrdmlah_d(n[i], m[i], 0, false, false); in HELPER()
768 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
771 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
772 d[i] = do_sqrdmlah_d(n[i], m[i], 0, false, true); in HELPER()
778 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
782 for (i = 0; i < opr_sz / 8; i += 16 / 8) { in HELPER()
783 int64_t mm = m[i]; in HELPER()
785 d[i + j] = do_sqrdmlah_d(n[i + j], mm, 0, false, false); in HELPER()
792 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
796 for (i = 0; i < opr_sz / 8; i += 16 / 8) { in HELPER()
797 int64_t mm = m[i]; in HELPER()
799 d[i + j] = do_sqrdmlah_d(n[i + j], mm, 0, false, true); in HELPER()
814 intptr_t i, opr_sz = simd_oprsz(desc); \
818 for (i = 0; i < opr_sz / sizeof(TYPED); ++i) { \
819 d[i] = (a[i] + \
820 (TYPED)n[i * 4 + 0] * m[i * 4 + 0] + \
821 (TYPED)n[i * 4 + 1] * m[i * 4 + 1] + \
822 (TYPED)n[i * 4 + 2] * m[i * 4 + 2] + \
823 (TYPED)n[i * 4 + 3] * m[i * 4 + 3]); \
837 intptr_t i = 0, opr_sz = simd_oprsz(desc); \ in DO_DOT()
852 TYPED m0 = m_indexed[i * 4 + 0]; \ in DO_DOT()
853 TYPED m1 = m_indexed[i * 4 + 1]; \ in DO_DOT()
854 TYPED m2 = m_indexed[i * 4 + 2]; \ in DO_DOT()
855 TYPED m3 = m_indexed[i * 4 + 3]; \ in DO_DOT()
857 d[i] = (a[i] + \ in DO_DOT()
858 n[i * 4 + 0] * m0 + \ in DO_DOT()
859 n[i * 4 + 1] * m1 + \ in DO_DOT()
860 n[i * 4 + 2] * m2 + \ in DO_DOT()
861 n[i * 4 + 3] * m3); \ in DO_DOT()
862 } while (++i < segend); \ in DO_DOT()
863 segend = i + (16 / sizeof(TYPED)); \ in DO_DOT()
864 } while (i < opr_sz_n); \ in DO_DOT()
884 uintptr_t i; local
886 for (i = 0; i < opr_sz / 2; i += 2) {
887 float16 e0 = n[H2(i)];
888 float16 e1 = m[H2(i + 1)];
889 float16 e2 = n[H2(i + 1)];
890 float16 e3 = m[H2(i)];
898 d[H2(i)] = float16_add(e0, e1, fpst);
899 d[H2(i + 1)] = float16_add(e2, e3, fpst);
913 uintptr_t i; in HELPER() local
915 for (i = 0; i < opr_sz / 4; i += 2) { in HELPER()
916 float32 e0 = n[H4(i)]; in HELPER()
917 float32 e1 = m[H4(i + 1)]; in HELPER()
918 float32 e2 = n[H4(i + 1)]; in HELPER()
919 float32 e3 = m[H4(i)]; in HELPER()
927 d[H4(i)] = float32_add(e0, e1, fpst); in HELPER()
928 d[H4(i + 1)] = float32_add(e2, e3, fpst); in HELPER()
942 uintptr_t i; in HELPER() local
944 for (i = 0; i < opr_sz / 8; i += 2) { in HELPER()
945 float64 e0 = n[i]; in HELPER()
946 float64 e1 = m[i + 1]; in HELPER()
947 float64 e2 = n[i + 1]; in HELPER()
948 float64 e3 = m[i]; in HELPER()
956 d[i] = float64_add(e0, e1, fpst); in HELPER()
957 d[i + 1] = float64_add(e2, e3, fpst); in HELPER()
972 uintptr_t i; in HELPER() local
980 for (i = 0; i < opr_sz / 2; i += 2) { in HELPER()
981 float16 e2 = n[H2(i + flip)]; in HELPER()
982 float16 e1 = m[H2(i + flip)] ^ negx_real; in HELPER()
984 float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag; in HELPER()
986 d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst); in HELPER()
987 d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst); in HELPER()
1005 intptr_t i, j; in HELPER() local
1013 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
1014 float16 mr = m[H2(i + 2 * index + 0)]; in HELPER()
1015 float16 mi = m[H2(i + 2 * index + 1)]; in HELPER()
1019 for (j = i; j < i + eltspersegment; j += 2) { in HELPER()
1040 uintptr_t i; in HELPER() local
1048 for (i = 0; i < opr_sz / 4; i += 2) { in HELPER()
1049 float32 e2 = n[H4(i + flip)]; in HELPER()
1050 float32 e1 = m[H4(i + flip)] ^ negx_real; in HELPER()
1052 float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag; in HELPER()
1054 d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst); in HELPER()
1055 d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst); in HELPER()
1073 intptr_t i, j; in HELPER() local
1081 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
1082 float32 mr = m[H4(i + 2 * index + 0)]; in HELPER()
1083 float32 mi = m[H4(i + 2 * index + 1)]; in HELPER()
1087 for (j = i; j < i + eltspersegment; j += 2) { in HELPER()
1108 uintptr_t i; in HELPER() local
1116 for (i = 0; i < opr_sz / 8; i += 2) { in HELPER()
1117 float64 e2 = n[i + flip]; in HELPER()
1118 float64 e1 = m[i + flip] ^ negx_real; in HELPER()
1120 float64 e3 = m[i + 1 - flip] ^ negx_imag; in HELPER()
1122 d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst); in HELPER()
1123 d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst); in HELPER()
1229 intptr_t i, oprsz = simd_oprsz(desc); \
1231 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
1232 d[i] = FUNC(n[i], stat); \
1411 intptr_t i, oprsz = simd_oprsz(desc); \
1413 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
1414 d[i] = FUNC(n[i], m[i], stat); \
1605 intptr_t i, oprsz = simd_oprsz(desc); \
1607 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
1608 d[i] = FUNC(d[i], n[i], m[i], stat); \
1638 intptr_t i, j, oprsz = simd_oprsz(desc); \ in DO_MULADD()
1642 for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ in DO_MULADD()
1643 TYPE mm = m[H(i + idx)]; \ in DO_MULADD()
1645 d[i + j] = n[i + j] * mm; \ in DO_MULADD()
1660 intptr_t i, j, oprsz = simd_oprsz(desc); \
1664 for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
1665 TYPE mm = m[H(i + idx)]; \
1667 d[i + j] = a[i + j] OP n[i + j] * mm; \
1687 intptr_t i, j, oprsz = simd_oprsz(desc); \
1691 for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
1692 TYPE mm = m[H(i + idx)]; \
1694 d[i + j] = ADD(d[i + j], MUL(n[i + j], mm, stat), stat); \
1731 intptr_t i, j, oprsz = simd_oprsz(desc); \
1735 for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
1736 TYPE mm = m[H(i + idx)]; \
1738 d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
1739 a[i + j], NEGF, stat); \
1762 intptr_t i, oprsz = simd_oprsz(desc); \
1765 for (i = 0; i < oprsz / sizeof(TYPEN); i++) { \
1766 WTYPE dd = (WTYPE)n[i] OP m[i]; \
1774 d[i] = dd; \
1812 intptr_t i, oprsz = simd_oprsz(desc); local
1816 for (i = 0; i < oprsz / 8; i++) {
1817 uint64_t nn = n[i], mm = m[i], dd = nn + mm;
1822 d[i] = dd;
1834 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1838 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1839 uint64_t nn = n[i], mm = m[i], dd = nn - mm; in HELPER()
1844 d[i] = dd; in HELPER()
1856 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1860 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1861 int64_t nn = n[i], mm = m[i], dd = nn + mm; in HELPER()
1866 d[i] = dd; in HELPER()
1878 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1882 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1883 int64_t nn = n[i], mm = m[i], dd = nn - mm; in HELPER()
1888 d[i] = dd; in HELPER()
1900 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1904 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1905 uint64_t nn = n[i]; in HELPER()
1906 int64_t mm = m[i]; in HELPER()
1920 d[i] = dd; in HELPER()
1932 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
1936 for (i = 0; i < oprsz / 8; i++) { in HELPER()
1937 int64_t nn = n[i]; in HELPER()
1938 uint64_t mm = m[i]; in HELPER()
1945 d[i] = dd; in HELPER()
1957 intptr_t i, oprsz = simd_oprsz(desc); \
1960 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
1961 d[i] += n[i] >> shift; \
1981 intptr_t i, oprsz = simd_oprsz(desc); \ in DO_SRA()
1984 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ in DO_SRA()
1985 TYPE tmp = n[i] >> (shift - 1); \ in DO_SRA()
1986 d[i] = (tmp >> 1) + (tmp & 1); \ in DO_SRA()
2006 intptr_t i, oprsz = simd_oprsz(desc); \
2009 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2010 TYPE tmp = n[i] >> (shift - 1); \
2011 d[i] += (tmp >> 1) + (tmp & 1); \
2031 intptr_t i, oprsz = simd_oprsz(desc); \
2034 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2035 d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \
2050 intptr_t i, oprsz = simd_oprsz(desc); \
2053 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2054 d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \
2135 intptr_t i, oprsz = simd_oprsz(desc); in do_fmlal() local
2147 for (i = 0; i < oprsz / 4; i++) { in do_fmlal()
2148 float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); in do_fmlal()
2149 float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16); in do_fmlal()
2150 d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); in do_fmlal()
2184 intptr_t i, oprsz = simd_oprsz(desc); in HELPER() local
2199 for (i = 0; i < oprsz; i += sizeof(float32)) { in HELPER()
2200 float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx; in HELPER()
2201 float16 mm_16 = *(float16 *)(vm + H1_2(i + sel)); in HELPER()
2204 float32 aa = *(float32 *)(va + H1_4(i)); in HELPER()
2206 *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status); in HELPER()
2217 intptr_t i, oprsz = simd_oprsz(desc); in do_fmlal_idx() local
2231 for (i = 0; i < oprsz / 4; i++) { in do_fmlal_idx()
2232 float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16); in do_fmlal_idx()
2233 d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst); in do_fmlal_idx()
2267 intptr_t i, j, oprsz = simd_oprsz(desc); in HELPER() local
2282 for (i = 0; i < oprsz; i += 16) { in HELPER()
2283 float16 mm_16 = *(float16 *)(vm + i + idx); in HELPER()
2287 float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx; in HELPER()
2289 float32 aa = *(float32 *)(va + H1_4(i + j)); in HELPER()
2291 *(float32 *)(vd + H1_4(i + j)) = in HELPER()
2299 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2302 for (i = 0; i < opr_sz; ++i) { in HELPER()
2303 int8_t mm = m[i]; in HELPER()
2304 int8_t nn = n[i]; in HELPER()
2313 d[i] = res; in HELPER()
2320 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2323 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
2324 int8_t mm = m[i]; /* only 8 bits of shift are significant */ in HELPER()
2325 int16_t nn = n[i]; in HELPER()
2334 d[i] = res; in HELPER()
2341 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2344 for (i = 0; i < opr_sz; ++i) { in HELPER()
2345 int8_t mm = m[i]; in HELPER()
2346 uint8_t nn = n[i]; in HELPER()
2357 d[i] = res; in HELPER()
2364 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2367 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
2368 int8_t mm = m[i]; /* only 8 bits of shift are significant */ in HELPER()
2369 uint16_t nn = n[i]; in HELPER()
2380 d[i] = res; in HELPER()
2396 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2399 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2400 d[i] = clmul_8x8_low(n[i], m[i]); in HELPER()
2412 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2416 for (i = 0; i < opr_sz / 8; i += 2) { in HELPER()
2417 Int128 r = clmul_64(n[i + hi], m[i + hi]); in HELPER()
2418 d[i] = int128_getlo(r); in HELPER()
2419 d[i + 1] = int128_gethi(r); in HELPER()
2442 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2445 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2446 d[i] = clmul_8x4_even(n[i] >> shift, m[i] >> shift); in HELPER()
2453 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2457 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2458 d[i] = clmul_32(n[2 * i + sel], m[2 * i + sel]); in HELPER()
2466 intptr_t i, opr_sz = simd_oprsz(desc); \
2467 for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
2468 TYPE nn = *(TYPE *)(vn + i); \
2469 *(TYPE *)(vd + i) = -(nn OP 0); \
2491 intptr_t i, opr_sz = simd_oprsz(desc); \
2494 for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
2495 d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
2515 intptr_t i, opr_sz = simd_oprsz(desc); \ in DO_ABD()
2518 for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \ in DO_ABD()
2519 d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \ in DO_ABD()
2547 for (intptr_t i = 0; i < half; ++i) { \
2548 d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)], stat); \
2550 for (intptr_t i = 0; i < half; ++i) { \
2551 d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)], stat); \
2598 for (intptr_t i = 0; i < half; ++i) { \
2599 d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)]); \
2601 for (intptr_t i = 0; i < half; ++i) { \
2602 d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)]); \
2635 intptr_t i, oprsz = simd_oprsz(desc); \
2639 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2640 d[i] = FUNC(n[i], shift, fpst); \
2664 intptr_t i, oprsz = simd_oprsz(desc); \
2669 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2670 d[i] = FUNC(n[i], 0, fpst); \
2688 intptr_t i, oprsz = simd_oprsz(desc); \
2693 for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
2694 d[i] = FUNC(n[i], fpst); \
2731 for (size_t i = 0; i < oprsz; ++i) { local
2732 uint32_t index = indices[H1(i)];
2743 result.b[H1(i)] = table[H1(index % 16)];
2760 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2763 for (i = 0; i < opr_sz; ++i) { in HELPER()
2764 d[i] = ((int32_t)n[i] * m[i]) >> 8; in HELPER()
2771 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2774 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
2775 d[i] = ((int32_t)n[i] * m[i]) >> 16; in HELPER()
2782 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2785 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
2786 d[i] = ((int64_t)n[i] * m[i]) >> 32; in HELPER()
2793 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2797 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2798 muls64(&discard, &d[i], n[i], m[i]); in HELPER()
2805 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2808 for (i = 0; i < opr_sz; ++i) { in HELPER()
2809 d[i] = ((uint32_t)n[i] * m[i]) >> 8; in HELPER()
2816 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2819 for (i = 0; i < opr_sz / 2; ++i) { in HELPER()
2820 d[i] = ((uint32_t)n[i] * m[i]) >> 16; in HELPER()
2827 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2830 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
2831 d[i] = ((uint64_t)n[i] * m[i]) >> 32; in HELPER()
2838 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
2842 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
2843 mulu64(&discard, &d[i], n[i], m[i]); in HELPER()
2850 intptr_t i, opr_sz = simd_oprsz(desc) / 8; in HELPER() local
2854 for (i = 0; i < opr_sz; ++i) { in HELPER()
2855 d[i] = ror64(n[i] ^ m[i], shr); in HELPER()
2910 * i j i j in do_mmla_b()
3025 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3031 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3032 d[i] = bfdotadd_ebf(a[i], n[i], m[i], &fpst, &fpst_odd); in HELPER()
3035 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3036 d[i] = bfdotadd(a[i], n[i], m[i], &fpst); in HELPER()
3045 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
3054 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
3055 uint32_t m_idx = m[i + H4(index)]; in HELPER()
3057 for (j = i; j < i + eltspersegment; j++) { in HELPER()
3062 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
3063 uint32_t m_idx = m[i + H4(index)]; in HELPER()
3065 for (j = i; j < i + eltspersegment; j++) { in HELPER()
3090 * i j i k j k in HELPER()
3122 * i j i k j k in HELPER()
3152 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3157 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3158 float32 nn = n[H2(i * 2 + sel)] << 16; in HELPER()
3159 float32 mm = m[H2(i * 2 + sel)] << 16; in HELPER()
3160 d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat); in HELPER()
3168 intptr_t i, j, opr_sz = simd_oprsz(desc); in HELPER() local
3176 for (i = 0; i < elements; i += eltspersegment) { in HELPER()
3177 float32 m_idx = m[H2(2 * i + index)] << 16; in HELPER()
3179 for (j = i; j < i + eltspersegment; j++) { in HELPER()
3190 intptr_t i, opr_sz = simd_oprsz(desc); \
3191 for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
3192 TYPE aa = *(TYPE *)(a + i); \
3193 TYPE nn = *(TYPE *)(n + i); \
3194 TYPE mm = *(TYPE *)(m + i); \
3196 *(TYPE *)(d + i) = dd; \
3214 intptr_t i, opr_sz = simd_oprsz(desc); in DO_CLAMP() local
3217 for (i = 0; i < opr_sz; ++i) { in DO_CLAMP()
3218 d[i] = ctpop8(n[i]); in DO_CLAMP()
3226 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3229 for (i = 0; i < opr_sz / 8; ++i) { in HELPER()
3230 d[i] = revbit64(bswap64(n[i])); in HELPER()
3237 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3240 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3241 d[i] = helper_recpe_u32(n[i]); in HELPER()
3248 intptr_t i, opr_sz = simd_oprsz(desc); in HELPER() local
3251 for (i = 0; i < opr_sz / 4; ++i) { in HELPER()
3252 d[i] = helper_rsqrte_u32(n[i]); in HELPER()