Lines Matching +full:0 +full:- +full:9 +full:a +full:- +full:f
38 name, n, r->q3, r->q2, r->q1, r->q0); in dump_ymm()
43 v[3], v[2], v[1], v[0]); in dump_ymm()
48 v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]); in dump_ymm()
56 for (i = 0; i < 16; i++) { in dump_regs()
57 dump_ymm("ymm", i, &s->ymm[i], 0); in dump_regs()
59 for (i = 0; i < 4; i++) { in dump_regs()
60 dump_ymm("mem", i, &s->mem0[i], 0); in dump_regs()
64 static void compare_state(const reg_state *a, const reg_state *b) in compare_state() argument
67 for (i = 0; i < 8; i++) { in compare_state()
68 if (a->mm[i] != b->mm[i]) { in compare_state()
69 printf("MM%d = %016lx\n", i, b->mm[i]); in compare_state()
72 for (i = 0; i < 16; i++) { in compare_state()
73 if (a->r[i] != b->r[i]) { in compare_state()
74 printf("r%d = %016lx\n", i, b->r[i]); in compare_state()
77 for (i = 0; i < 16; i++) { in compare_state()
78 if (memcmp(&a->ymm[i], &b->ymm[i], 32)) { in compare_state()
79 dump_ymm("ymm", i, &b->ymm[i], a->ff); in compare_state()
82 for (i = 0; i < 4; i++) { in compare_state()
83 if (memcmp(&a->mem0[i], &a->mem[i], 32)) { in compare_state()
84 dump_ymm("mem", i, &a->mem[i], a->ff); in compare_state()
87 if (a->flags != b->flags) { in compare_state()
88 printf("FLAGS = %016lx\n", b->flags); in compare_state()
92 #define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t"
93 #define LOADYMM(r, o) "vmovdqa " #r ", " #o "[%0]\n\t"
96 #define MMREG(F) \ argument
97 F(mm0, 0x00) \
98 F(mm1, 0x08) \
99 F(mm2, 0x10) \
100 F(mm3, 0x18) \
101 F(mm4, 0x20) \
102 F(mm5, 0x28) \
103 F(mm6, 0x30) \
104 F(mm7, 0x38)
105 #define YMMREG(F) \ argument
106 F(ymm0, 0x040) \
107 F(ymm1, 0x060) \
108 F(ymm2, 0x080) \
109 F(ymm3, 0x0a0) \
110 F(ymm4, 0x0c0) \
111 F(ymm5, 0x0e0) \
112 F(ymm6, 0x100) \
113 F(ymm7, 0x120) \
114 F(ymm8, 0x140) \
115 F(ymm9, 0x160) \
116 F(ymm10, 0x180) \
117 F(ymm11, 0x1a0) \
118 F(ymm12, 0x1c0) \
119 F(ymm13, 0x1e0) \
120 F(ymm14, 0x200) \
121 F(ymm15, 0x220)
124 #define REG(F) \ argument
125 F(rbx, 0x248) \
126 F(rcx, 0x250) \
127 F(rdx, 0x258) \
128 F(rsi, 0x260) \
129 F(rdi, 0x268) \
130 F(r8, 0x280) \
131 F(r9, 0x288) \
132 F(r10, 0x290) \
133 F(r11, 0x298) \
134 F(r12, 0x2a0) \
135 F(r13, 0x2a8) \
136 F(r14, 0x2b0) \
137 F(r15, 0x2b8) \
142 reg_state *init = t->init; in run_test()
143 memcpy(init->mem, init->mem0, sizeof(init->mem)); in run_test()
144 printf("%5d %s\n", t->n, t->s); in run_test()
155 "mov rax, %0\n\t" in run_test()
160 "mov rcx, 0x2c0[rax]\n\t" in run_test()
161 "and rcx, 0xff\n\t" in run_test()
166 "mov rax, 0x240[rax]\n\t" in run_test()
172 "mov 0x240[rax], rbx\n\t" in run_test()
173 "mov rbx, 0\n\t" in run_test()
174 "mov 0x270[rax], rbx\n\t" in run_test()
175 "mov 0x278[rax], rbx\n\t" in run_test()
178 "and rbx, 0xff\n\t" in run_test()
179 "mov 0x2c0[rax], rbx\n\t" in run_test()
188 : : "r"(init), "r"(&result), "r"(t->fn) in run_test()
206 #include "test-avx.h"
211 #include "test-avx.h"
212 {-1, NULL, "", NULL}
218 for (t = test_table; t->fn; t++) { in run_all()
223 #define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
225 uint16_t val_f16[] = { 0x4000, 0xbc00, 0x44cd, 0x3a66, 0x4200, 0x7a1a, 0x4780, 0x4826 };
226 float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3};
227 double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5};
229 {0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu,
230 0xac3ff76c4daa4b28lu, 0xe7fabd204cb54083lu},
231 {0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu,
232 0x56621e553d52b56clu, 0xd0069553da8f584alu},
233 {0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu,
234 0x738ba2c66d3fe126lu, 0x5707219c6e6c26b4lu},
237 v4di deadbeef = {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull,
238 0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull};
239 /* &gather_mem[0x10] is 512 bytes from the base; indices must be >=-64, <64
241 v4di indexq = {0x000000000000001full, 0x000000000000003dull,
242 0xffffffffffffffffull, 0xffffffffffffffdfull};
243 v4di indexd = {0x00000002ffffffcdull, 0xfffffff500000010ull,
244 0x0000003afffffff0ull, 0x000000000000000eull};
246 v4di gather_mem[0x20];
251 memset(r, 0, sizeof(*r)); in init_f16reg()
260 for (i = 0; i < 8; i++) { in init_f32reg()
263 n = 0; in init_f32reg()
274 for (i = 0; i < 4; i++) { in init_f64reg()
277 n = 0; in init_f64reg()
288 r->q0 = val_i64[n].q0 ^ mask; in init_intreg()
289 r->q1 = val_i64[n].q1 ^ mask; in init_intreg()
290 r->q2 = val_i64[n].q2 ^ mask; in init_intreg()
291 r->q3 = val_i64[n].q3 ^ mask; in init_intreg()
294 n = 0; in init_intreg()
295 mask *= 0x104C11DB7; in init_intreg()
303 s->r[3] = (uint64_t)&s->mem[0]; /* rdx */ in init_all()
304 s->r[4] = (uint64_t)&gather_mem[ARRAY_LEN(gather_mem) / 2]; /* rsi */ in init_all()
305 s->r[5] = (uint64_t)&s->mem[2]; /* rdi */ in init_all()
306 s->flags = 2; in init_all()
307 for (i = 0; i < 16; i++) { in init_all()
308 s->ymm[i] = deadbeef; in init_all()
310 s->ymm[13] = indexd; in init_all()
311 s->ymm[14] = indexq; in init_all()
312 for (i = 0; i < 4; i++) { in init_all()
313 s->mem0[i] = deadbeef; in init_all()
322 init_intreg(&initI.ymm[0]); in main()
323 init_intreg(&initI.ymm[9]); in main()
332 init_f16reg(&initF16.ymm[0]); in main()
333 init_f16reg(&initF16.ymm[9]); in main()
343 init_f32reg(&initF32.ymm[0]); in main()
344 init_f32reg(&initF32.ymm[9]); in main()
354 init_f64reg(&initF64.ymm[0]); in main()
355 init_f64reg(&initF64.ymm[9]); in main()
364 for (i = 0; i < ARRAY_LEN(gather_mem); i++) { in main()
374 return 0; in main()