xref: /qemu/tcg/optimize.c (revision c7c513389c6cb8c6dd60e55d1c99244de4e93663)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32 
33 
34 typedef struct MemCopyInfo {
35     IntervalTreeNode itree;
36     QSIMPLEQ_ENTRY (MemCopyInfo) next;
37     TCGTemp *ts;
38     TCGType type;
39 } MemCopyInfo;
40 
41 typedef struct TempOptInfo {
42     bool is_const;
43     TCGTemp *prev_copy;
44     TCGTemp *next_copy;
45     QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
46     uint64_t val;
47     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
48     uint64_t s_mask;  /* mask bit is 1 if value bit matches msb */
49 } TempOptInfo;
50 
51 typedef struct OptContext {
52     TCGContext *tcg;
53     TCGOp *prev_mb;
54     TCGTempSet temps_used;
55 
56     IntervalTreeRoot mem_copy;
57     QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
58 
59     /* In flight values from optimization. */
60     TCGType type;
61     int carry_state;  /* -1 = non-constant, {0,1} = constant carry-in */
62 } OptContext;
63 
64 static inline TempOptInfo *ts_info(TCGTemp *ts)
65 {
66     return ts->state_ptr;
67 }
68 
69 static inline TempOptInfo *arg_info(TCGArg arg)
70 {
71     return ts_info(arg_temp(arg));
72 }
73 
74 static inline bool ti_is_const(TempOptInfo *ti)
75 {
76     return ti->is_const;
77 }
78 
79 static inline uint64_t ti_const_val(TempOptInfo *ti)
80 {
81     return ti->val;
82 }
83 
84 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
85 {
86     return ti_is_const(ti) && ti_const_val(ti) == val;
87 }
88 
89 static inline bool ts_is_const(TCGTemp *ts)
90 {
91     return ti_is_const(ts_info(ts));
92 }
93 
94 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
95 {
96     return ti_is_const_val(ts_info(ts), val);
97 }
98 
99 static inline bool arg_is_const(TCGArg arg)
100 {
101     return ts_is_const(arg_temp(arg));
102 }
103 
104 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
105 {
106     return ts_is_const_val(arg_temp(arg), val);
107 }
108 
109 static inline bool ts_is_copy(TCGTemp *ts)
110 {
111     return ts_info(ts)->next_copy != ts;
112 }
113 
114 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
115 {
116     return a->kind < b->kind ? b : a;
117 }
118 
119 /* Initialize and activate a temporary.  */
120 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
121 {
122     size_t idx = temp_idx(ts);
123     TempOptInfo *ti;
124 
125     if (test_bit(idx, ctx->temps_used.l)) {
126         return;
127     }
128     set_bit(idx, ctx->temps_used.l);
129 
130     ti = ts->state_ptr;
131     if (ti == NULL) {
132         ti = tcg_malloc(sizeof(TempOptInfo));
133         ts->state_ptr = ti;
134     }
135 
136     ti->next_copy = ts;
137     ti->prev_copy = ts;
138     QSIMPLEQ_INIT(&ti->mem_copy);
139     if (ts->kind == TEMP_CONST) {
140         ti->is_const = true;
141         ti->val = ts->val;
142         ti->z_mask = ts->val;
143         ti->s_mask = INT64_MIN >> clrsb64(ts->val);
144     } else {
145         ti->is_const = false;
146         ti->z_mask = -1;
147         ti->s_mask = 0;
148     }
149 }
150 
151 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
152 {
153     IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
154     return r ? container_of(r, MemCopyInfo, itree) : NULL;
155 }
156 
157 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
158 {
159     IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
160     return r ? container_of(r, MemCopyInfo, itree) : NULL;
161 }
162 
163 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
164 {
165     TCGTemp *ts = mc->ts;
166     TempOptInfo *ti = ts_info(ts);
167 
168     interval_tree_remove(&mc->itree, &ctx->mem_copy);
169     QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
170     QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
171 }
172 
173 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
174 {
175     while (true) {
176         MemCopyInfo *mc = mem_copy_first(ctx, s, l);
177         if (!mc) {
178             break;
179         }
180         remove_mem_copy(ctx, mc);
181     }
182 }
183 
184 static void remove_mem_copy_all(OptContext *ctx)
185 {
186     remove_mem_copy_in(ctx, 0, -1);
187     tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
188 }
189 
190 static TCGTemp *find_better_copy(TCGTemp *ts)
191 {
192     TCGTemp *i, *ret;
193 
194     /* If this is already readonly, we can't do better. */
195     if (temp_readonly(ts)) {
196         return ts;
197     }
198 
199     ret = ts;
200     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
201         ret = cmp_better_copy(ret, i);
202     }
203     return ret;
204 }
205 
206 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
207 {
208     TempOptInfo *si = ts_info(src_ts);
209     TempOptInfo *di = ts_info(dst_ts);
210     MemCopyInfo *mc;
211 
212     QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
213         tcg_debug_assert(mc->ts == src_ts);
214         mc->ts = dst_ts;
215     }
216     QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
217 }
218 
219 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
220 static void reset_ts(OptContext *ctx, TCGTemp *ts)
221 {
222     TempOptInfo *ti = ts_info(ts);
223     TCGTemp *pts = ti->prev_copy;
224     TCGTemp *nts = ti->next_copy;
225     TempOptInfo *pi = ts_info(pts);
226     TempOptInfo *ni = ts_info(nts);
227 
228     ni->prev_copy = ti->prev_copy;
229     pi->next_copy = ti->next_copy;
230     ti->next_copy = ts;
231     ti->prev_copy = ts;
232     ti->is_const = false;
233     ti->z_mask = -1;
234     ti->s_mask = 0;
235 
236     if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
237         if (ts == nts) {
238             /* Last temp copy being removed, the mem copies die. */
239             MemCopyInfo *mc;
240             QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
241                 interval_tree_remove(&mc->itree, &ctx->mem_copy);
242             }
243             QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
244         } else {
245             move_mem_copies(find_better_copy(nts), ts);
246         }
247     }
248 }
249 
250 static void reset_temp(OptContext *ctx, TCGArg arg)
251 {
252     reset_ts(ctx, arg_temp(arg));
253 }
254 
255 static void record_mem_copy(OptContext *ctx, TCGType type,
256                             TCGTemp *ts, intptr_t start, intptr_t last)
257 {
258     MemCopyInfo *mc;
259     TempOptInfo *ti;
260 
261     mc = QSIMPLEQ_FIRST(&ctx->mem_free);
262     if (mc) {
263         QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
264     } else {
265         mc = tcg_malloc(sizeof(*mc));
266     }
267 
268     memset(mc, 0, sizeof(*mc));
269     mc->itree.start = start;
270     mc->itree.last = last;
271     mc->type = type;
272     interval_tree_insert(&mc->itree, &ctx->mem_copy);
273 
274     ts = find_better_copy(ts);
275     ti = ts_info(ts);
276     mc->ts = ts;
277     QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
278 }
279 
280 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
281 {
282     TCGTemp *i;
283 
284     if (ts1 == ts2) {
285         return true;
286     }
287 
288     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
289         return false;
290     }
291 
292     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
293         if (i == ts2) {
294             return true;
295         }
296     }
297 
298     return false;
299 }
300 
301 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
302 {
303     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
304 }
305 
306 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
307 {
308     MemCopyInfo *mc;
309 
310     for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
311         if (mc->itree.start == s && mc->type == type) {
312             return find_better_copy(mc->ts);
313         }
314     }
315     return NULL;
316 }
317 
318 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
319 {
320     TCGType type = ctx->type;
321     TCGTemp *ts;
322 
323     if (type == TCG_TYPE_I32) {
324         val = (int32_t)val;
325     }
326 
327     ts = tcg_constant_internal(type, val);
328     init_ts_info(ctx, ts);
329 
330     return temp_arg(ts);
331 }
332 
333 static TCGArg arg_new_temp(OptContext *ctx)
334 {
335     TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
336     init_ts_info(ctx, ts);
337     return temp_arg(ts);
338 }
339 
340 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
341                                TCGOpcode opc, unsigned narg)
342 {
343     return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
344 }
345 
346 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
347                                 TCGOpcode opc, unsigned narg)
348 {
349     return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
350 }
351 
352 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
353 {
354     TCGTemp *dst_ts = arg_temp(dst);
355     TCGTemp *src_ts = arg_temp(src);
356     TempOptInfo *di;
357     TempOptInfo *si;
358     TCGOpcode new_op;
359 
360     if (ts_are_copies(dst_ts, src_ts)) {
361         tcg_op_remove(ctx->tcg, op);
362         return true;
363     }
364 
365     reset_ts(ctx, dst_ts);
366     di = ts_info(dst_ts);
367     si = ts_info(src_ts);
368 
369     switch (ctx->type) {
370     case TCG_TYPE_I32:
371     case TCG_TYPE_I64:
372         new_op = INDEX_op_mov;
373         break;
374     case TCG_TYPE_V64:
375     case TCG_TYPE_V128:
376     case TCG_TYPE_V256:
377         /* TCGOP_TYPE and TCGOP_VECE remain unchanged.  */
378         new_op = INDEX_op_mov_vec;
379         break;
380     default:
381         g_assert_not_reached();
382     }
383     op->opc = new_op;
384     op->args[0] = dst;
385     op->args[1] = src;
386 
387     di->z_mask = si->z_mask;
388     di->s_mask = si->s_mask;
389 
390     if (src_ts->type == dst_ts->type) {
391         TempOptInfo *ni = ts_info(si->next_copy);
392 
393         di->next_copy = si->next_copy;
394         di->prev_copy = src_ts;
395         ni->prev_copy = dst_ts;
396         si->next_copy = dst_ts;
397         di->is_const = si->is_const;
398         di->val = si->val;
399 
400         if (!QSIMPLEQ_EMPTY(&si->mem_copy)
401             && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
402             move_mem_copies(dst_ts, src_ts);
403         }
404     }
405     return true;
406 }
407 
408 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
409                              TCGArg dst, uint64_t val)
410 {
411     /* Convert movi to mov with constant temp. */
412     return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
413 }
414 
415 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
416                                       uint64_t x, uint64_t y)
417 {
418     uint64_t l64, h64;
419 
420     switch (op) {
421     case INDEX_op_add:
422         return x + y;
423 
424     case INDEX_op_sub:
425         return x - y;
426 
427     case INDEX_op_mul:
428         return x * y;
429 
430     case INDEX_op_and:
431     case INDEX_op_and_vec:
432         return x & y;
433 
434     case INDEX_op_or:
435     case INDEX_op_or_vec:
436         return x | y;
437 
438     case INDEX_op_xor:
439     case INDEX_op_xor_vec:
440         return x ^ y;
441 
442     case INDEX_op_shl:
443         if (type == TCG_TYPE_I32) {
444             return (uint32_t)x << (y & 31);
445         }
446         return (uint64_t)x << (y & 63);
447 
448     case INDEX_op_shr:
449         if (type == TCG_TYPE_I32) {
450             return (uint32_t)x >> (y & 31);
451         }
452         return (uint64_t)x >> (y & 63);
453 
454     case INDEX_op_sar:
455         if (type == TCG_TYPE_I32) {
456             return (int32_t)x >> (y & 31);
457         }
458         return (int64_t)x >> (y & 63);
459 
460     case INDEX_op_rotr:
461         if (type == TCG_TYPE_I32) {
462             return ror32(x, y & 31);
463         }
464         return ror64(x, y & 63);
465 
466     case INDEX_op_rotl:
467         if (type == TCG_TYPE_I32) {
468             return rol32(x, y & 31);
469         }
470         return rol64(x, y & 63);
471 
472     case INDEX_op_not:
473     case INDEX_op_not_vec:
474         return ~x;
475 
476     case INDEX_op_neg:
477         return -x;
478 
479     case INDEX_op_andc:
480     case INDEX_op_andc_vec:
481         return x & ~y;
482 
483     case INDEX_op_orc:
484     case INDEX_op_orc_vec:
485         return x | ~y;
486 
487     case INDEX_op_eqv:
488     case INDEX_op_eqv_vec:
489         return ~(x ^ y);
490 
491     case INDEX_op_nand:
492     case INDEX_op_nand_vec:
493         return ~(x & y);
494 
495     case INDEX_op_nor:
496     case INDEX_op_nor_vec:
497         return ~(x | y);
498 
499     case INDEX_op_clz:
500         if (type == TCG_TYPE_I32) {
501             return (uint32_t)x ? clz32(x) : y;
502         }
503         return x ? clz64(x) : y;
504 
505     case INDEX_op_ctz:
506         if (type == TCG_TYPE_I32) {
507             return (uint32_t)x ? ctz32(x) : y;
508         }
509         return x ? ctz64(x) : y;
510 
511     case INDEX_op_ctpop:
512         return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
513 
514     case INDEX_op_bswap16:
515         x = bswap16(x);
516         return y & TCG_BSWAP_OS ? (int16_t)x : x;
517 
518     case INDEX_op_bswap32:
519         x = bswap32(x);
520         return y & TCG_BSWAP_OS ? (int32_t)x : x;
521 
522     case INDEX_op_bswap64:
523         return bswap64(x);
524 
525     case INDEX_op_ext_i32_i64:
526         return (int32_t)x;
527 
528     case INDEX_op_extu_i32_i64:
529     case INDEX_op_extrl_i64_i32:
530         return (uint32_t)x;
531 
532     case INDEX_op_extrh_i64_i32:
533         return (uint64_t)x >> 32;
534 
535     case INDEX_op_muluh:
536         if (type == TCG_TYPE_I32) {
537             return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
538         }
539         mulu64(&l64, &h64, x, y);
540         return h64;
541 
542     case INDEX_op_mulsh:
543         if (type == TCG_TYPE_I32) {
544             return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
545         }
546         muls64(&l64, &h64, x, y);
547         return h64;
548 
549     case INDEX_op_divs:
550         /* Avoid crashing on divide by zero, otherwise undefined.  */
551         if (type == TCG_TYPE_I32) {
552             return (int32_t)x / ((int32_t)y ? : 1);
553         }
554         return (int64_t)x / ((int64_t)y ? : 1);
555 
556     case INDEX_op_divu:
557         if (type == TCG_TYPE_I32) {
558             return (uint32_t)x / ((uint32_t)y ? : 1);
559         }
560         return (uint64_t)x / ((uint64_t)y ? : 1);
561 
562     case INDEX_op_rems:
563         if (type == TCG_TYPE_I32) {
564             return (int32_t)x % ((int32_t)y ? : 1);
565         }
566         return (int64_t)x % ((int64_t)y ? : 1);
567 
568     case INDEX_op_remu:
569         if (type == TCG_TYPE_I32) {
570             return (uint32_t)x % ((uint32_t)y ? : 1);
571         }
572         return (uint64_t)x % ((uint64_t)y ? : 1);
573 
574     default:
575         g_assert_not_reached();
576     }
577 }
578 
579 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
580                                     uint64_t x, uint64_t y)
581 {
582     uint64_t res = do_constant_folding_2(op, type, x, y);
583     if (type == TCG_TYPE_I32) {
584         res = (int32_t)res;
585     }
586     return res;
587 }
588 
589 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
590 {
591     switch (c) {
592     case TCG_COND_EQ:
593         return x == y;
594     case TCG_COND_NE:
595         return x != y;
596     case TCG_COND_LT:
597         return (int32_t)x < (int32_t)y;
598     case TCG_COND_GE:
599         return (int32_t)x >= (int32_t)y;
600     case TCG_COND_LE:
601         return (int32_t)x <= (int32_t)y;
602     case TCG_COND_GT:
603         return (int32_t)x > (int32_t)y;
604     case TCG_COND_LTU:
605         return x < y;
606     case TCG_COND_GEU:
607         return x >= y;
608     case TCG_COND_LEU:
609         return x <= y;
610     case TCG_COND_GTU:
611         return x > y;
612     case TCG_COND_TSTEQ:
613         return (x & y) == 0;
614     case TCG_COND_TSTNE:
615         return (x & y) != 0;
616     case TCG_COND_ALWAYS:
617     case TCG_COND_NEVER:
618         break;
619     }
620     g_assert_not_reached();
621 }
622 
623 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
624 {
625     switch (c) {
626     case TCG_COND_EQ:
627         return x == y;
628     case TCG_COND_NE:
629         return x != y;
630     case TCG_COND_LT:
631         return (int64_t)x < (int64_t)y;
632     case TCG_COND_GE:
633         return (int64_t)x >= (int64_t)y;
634     case TCG_COND_LE:
635         return (int64_t)x <= (int64_t)y;
636     case TCG_COND_GT:
637         return (int64_t)x > (int64_t)y;
638     case TCG_COND_LTU:
639         return x < y;
640     case TCG_COND_GEU:
641         return x >= y;
642     case TCG_COND_LEU:
643         return x <= y;
644     case TCG_COND_GTU:
645         return x > y;
646     case TCG_COND_TSTEQ:
647         return (x & y) == 0;
648     case TCG_COND_TSTNE:
649         return (x & y) != 0;
650     case TCG_COND_ALWAYS:
651     case TCG_COND_NEVER:
652         break;
653     }
654     g_assert_not_reached();
655 }
656 
657 static int do_constant_folding_cond_eq(TCGCond c)
658 {
659     switch (c) {
660     case TCG_COND_GT:
661     case TCG_COND_LTU:
662     case TCG_COND_LT:
663     case TCG_COND_GTU:
664     case TCG_COND_NE:
665         return 0;
666     case TCG_COND_GE:
667     case TCG_COND_GEU:
668     case TCG_COND_LE:
669     case TCG_COND_LEU:
670     case TCG_COND_EQ:
671         return 1;
672     case TCG_COND_TSTEQ:
673     case TCG_COND_TSTNE:
674         return -1;
675     case TCG_COND_ALWAYS:
676     case TCG_COND_NEVER:
677         break;
678     }
679     g_assert_not_reached();
680 }
681 
682 /*
683  * Return -1 if the condition can't be simplified,
684  * and the result of the condition (0 or 1) if it can.
685  */
686 static int do_constant_folding_cond(TCGType type, TCGArg x,
687                                     TCGArg y, TCGCond c)
688 {
689     if (arg_is_const(x) && arg_is_const(y)) {
690         uint64_t xv = arg_info(x)->val;
691         uint64_t yv = arg_info(y)->val;
692 
693         switch (type) {
694         case TCG_TYPE_I32:
695             return do_constant_folding_cond_32(xv, yv, c);
696         case TCG_TYPE_I64:
697             return do_constant_folding_cond_64(xv, yv, c);
698         default:
699             /* Only scalar comparisons are optimizable */
700             return -1;
701         }
702     } else if (args_are_copies(x, y)) {
703         return do_constant_folding_cond_eq(c);
704     } else if (arg_is_const_val(y, 0)) {
705         switch (c) {
706         case TCG_COND_LTU:
707         case TCG_COND_TSTNE:
708             return 0;
709         case TCG_COND_GEU:
710         case TCG_COND_TSTEQ:
711             return 1;
712         default:
713             return -1;
714         }
715     }
716     return -1;
717 }
718 
719 /**
720  * swap_commutative:
721  * @dest: TCGArg of the destination argument, or NO_DEST.
722  * @p1: first paired argument
723  * @p2: second paired argument
724  *
725  * If *@p1 is a constant and *@p2 is not, swap.
726  * If *@p2 matches @dest, swap.
727  * Return true if a swap was performed.
728  */
729 
730 #define NO_DEST  temp_arg(NULL)
731 
732 static int pref_commutative(TempOptInfo *ti)
733 {
734     /* Slight preference for non-zero constants second. */
735     return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
736 }
737 
738 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
739 {
740     TCGArg a1 = *p1, a2 = *p2;
741     int sum = 0;
742     sum += pref_commutative(arg_info(a1));
743     sum -= pref_commutative(arg_info(a2));
744 
745     /* Prefer the constant in second argument, and then the form
746        op a, a, b, which is better handled on non-RISC hosts. */
747     if (sum > 0 || (sum == 0 && dest == a2)) {
748         *p1 = a2;
749         *p2 = a1;
750         return true;
751     }
752     return false;
753 }
754 
755 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
756 {
757     int sum = 0;
758     sum += pref_commutative(arg_info(p1[0]));
759     sum += pref_commutative(arg_info(p1[1]));
760     sum -= pref_commutative(arg_info(p2[0]));
761     sum -= pref_commutative(arg_info(p2[1]));
762     if (sum > 0) {
763         TCGArg t;
764         t = p1[0], p1[0] = p2[0], p2[0] = t;
765         t = p1[1], p1[1] = p2[1], p2[1] = t;
766         return true;
767     }
768     return false;
769 }
770 
771 /*
772  * Return -1 if the condition can't be simplified,
773  * and the result of the condition (0 or 1) if it can.
774  */
775 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
776                                      TCGArg *p1, TCGArg *p2, TCGArg *pcond)
777 {
778     TCGCond cond;
779     TempOptInfo *i1;
780     bool swap;
781     int r;
782 
783     swap = swap_commutative(dest, p1, p2);
784     cond = *pcond;
785     if (swap) {
786         *pcond = cond = tcg_swap_cond(cond);
787     }
788 
789     r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
790     if (r >= 0) {
791         return r;
792     }
793     if (!is_tst_cond(cond)) {
794         return -1;
795     }
796 
797     i1 = arg_info(*p1);
798 
799     /*
800      * TSTNE x,x -> NE x,0
801      * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
802      */
803     if (args_are_copies(*p1, *p2) ||
804         (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
805         *p2 = arg_new_constant(ctx, 0);
806         *pcond = tcg_tst_eqne_cond(cond);
807         return -1;
808     }
809 
810     /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
811     if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
812         *p2 = arg_new_constant(ctx, 0);
813         *pcond = tcg_tst_ltge_cond(cond);
814         return -1;
815     }
816 
817     /* Expand to AND with a temporary if no backend support. */
818     if (!TCG_TARGET_HAS_tst) {
819         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
820         TCGArg tmp = arg_new_temp(ctx);
821 
822         op2->args[0] = tmp;
823         op2->args[1] = *p1;
824         op2->args[2] = *p2;
825 
826         *p1 = tmp;
827         *p2 = arg_new_constant(ctx, 0);
828         *pcond = tcg_tst_eqne_cond(cond);
829     }
830     return -1;
831 }
832 
833 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
834 {
835     TCGArg al, ah, bl, bh;
836     TCGCond c;
837     bool swap;
838     int r;
839 
840     swap = swap_commutative2(args, args + 2);
841     c = args[4];
842     if (swap) {
843         args[4] = c = tcg_swap_cond(c);
844     }
845 
846     al = args[0];
847     ah = args[1];
848     bl = args[2];
849     bh = args[3];
850 
851     if (arg_is_const(bl) && arg_is_const(bh)) {
852         tcg_target_ulong blv = arg_info(bl)->val;
853         tcg_target_ulong bhv = arg_info(bh)->val;
854         uint64_t b = deposit64(blv, 32, 32, bhv);
855 
856         if (arg_is_const(al) && arg_is_const(ah)) {
857             tcg_target_ulong alv = arg_info(al)->val;
858             tcg_target_ulong ahv = arg_info(ah)->val;
859             uint64_t a = deposit64(alv, 32, 32, ahv);
860 
861             r = do_constant_folding_cond_64(a, b, c);
862             if (r >= 0) {
863                 return r;
864             }
865         }
866 
867         if (b == 0) {
868             switch (c) {
869             case TCG_COND_LTU:
870             case TCG_COND_TSTNE:
871                 return 0;
872             case TCG_COND_GEU:
873             case TCG_COND_TSTEQ:
874                 return 1;
875             default:
876                 break;
877             }
878         }
879 
880         /* TSTNE x,-1 -> NE x,0 */
881         if (b == -1 && is_tst_cond(c)) {
882             args[3] = args[2] = arg_new_constant(ctx, 0);
883             args[4] = tcg_tst_eqne_cond(c);
884             return -1;
885         }
886 
887         /* TSTNE x,sign -> LT x,0 */
888         if (b == INT64_MIN && is_tst_cond(c)) {
889             /* bl must be 0, so copy that to bh */
890             args[3] = bl;
891             args[4] = tcg_tst_ltge_cond(c);
892             return -1;
893         }
894     }
895 
896     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
897         r = do_constant_folding_cond_eq(c);
898         if (r >= 0) {
899             return r;
900         }
901 
902         /* TSTNE x,x -> NE x,0 */
903         if (is_tst_cond(c)) {
904             args[3] = args[2] = arg_new_constant(ctx, 0);
905             args[4] = tcg_tst_eqne_cond(c);
906             return -1;
907         }
908     }
909 
910     /* Expand to AND with a temporary if no backend support. */
911     if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
912         TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
913         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
914         TCGArg t1 = arg_new_temp(ctx);
915         TCGArg t2 = arg_new_temp(ctx);
916 
917         op1->args[0] = t1;
918         op1->args[1] = al;
919         op1->args[2] = bl;
920         op2->args[0] = t2;
921         op2->args[1] = ah;
922         op2->args[2] = bh;
923 
924         args[0] = t1;
925         args[1] = t2;
926         args[3] = args[2] = arg_new_constant(ctx, 0);
927         args[4] = tcg_tst_eqne_cond(c);
928     }
929     return -1;
930 }
931 
932 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
933 {
934     for (int i = 0; i < nb_args; i++) {
935         TCGTemp *ts = arg_temp(op->args[i]);
936         init_ts_info(ctx, ts);
937     }
938 }
939 
940 static void copy_propagate(OptContext *ctx, TCGOp *op,
941                            int nb_oargs, int nb_iargs)
942 {
943     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
944         TCGTemp *ts = arg_temp(op->args[i]);
945         if (ts_is_copy(ts)) {
946             op->args[i] = temp_arg(find_better_copy(ts));
947         }
948     }
949 }
950 
951 static void finish_bb(OptContext *ctx)
952 {
953     /* We only optimize memory barriers across basic blocks. */
954     ctx->prev_mb = NULL;
955 }
956 
957 static void finish_ebb(OptContext *ctx)
958 {
959     finish_bb(ctx);
960     /* We only optimize across extended basic blocks. */
961     memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
962     remove_mem_copy_all(ctx);
963 }
964 
965 static bool finish_folding(OptContext *ctx, TCGOp *op)
966 {
967     const TCGOpDef *def = &tcg_op_defs[op->opc];
968     int i, nb_oargs;
969 
970     nb_oargs = def->nb_oargs;
971     for (i = 0; i < nb_oargs; i++) {
972         TCGTemp *ts = arg_temp(op->args[i]);
973         reset_ts(ctx, ts);
974     }
975     return true;
976 }
977 
978 /*
979  * The fold_* functions return true when processing is complete,
980  * usually by folding the operation to a constant or to a copy,
981  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
982  * like collect information about the value produced, for use in
983  * optimizing a subsequent operation.
984  *
985  * These first fold_* functions are all helpers, used by other
986  * folders for more specific operations.
987  */
988 
989 static bool fold_const1(OptContext *ctx, TCGOp *op)
990 {
991     if (arg_is_const(op->args[1])) {
992         uint64_t t;
993 
994         t = arg_info(op->args[1])->val;
995         t = do_constant_folding(op->opc, ctx->type, t, 0);
996         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
997     }
998     return false;
999 }
1000 
1001 static bool fold_const2(OptContext *ctx, TCGOp *op)
1002 {
1003     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1004         uint64_t t1 = arg_info(op->args[1])->val;
1005         uint64_t t2 = arg_info(op->args[2])->val;
1006 
1007         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1008         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1009     }
1010     return false;
1011 }
1012 
1013 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1014 {
1015     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1016     return false;
1017 }
1018 
1019 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1020 {
1021     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1022     return fold_const2(ctx, op);
1023 }
1024 
1025 /*
1026  * Record "zero" and "sign" masks for the single output of @op.
1027  * See TempOptInfo definition of z_mask and s_mask.
1028  * If z_mask allows, fold the output to constant zero.
1029  * The passed s_mask may be augmented by z_mask.
1030  */
1031 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1032                           uint64_t z_mask, int64_t s_mask)
1033 {
1034     const TCGOpDef *def = &tcg_op_defs[op->opc];
1035     TCGTemp *ts;
1036     TempOptInfo *ti;
1037     int rep;
1038 
1039     /* Only single-output opcodes are supported here. */
1040     tcg_debug_assert(def->nb_oargs == 1);
1041 
1042     /*
1043      * 32-bit ops generate 32-bit results, which for the purpose of
1044      * simplifying tcg are sign-extended.  Certainly that's how we
1045      * represent our constants elsewhere.  Note that the bits will
1046      * be reset properly for a 64-bit value when encountering the
1047      * type changing opcodes.
1048      */
1049     if (ctx->type == TCG_TYPE_I32) {
1050         z_mask = (int32_t)z_mask;
1051         s_mask |= INT32_MIN;
1052     }
1053 
1054     if (z_mask == 0) {
1055         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
1056     }
1057 
1058     ts = arg_temp(op->args[0]);
1059     reset_ts(ctx, ts);
1060 
1061     ti = ts_info(ts);
1062     ti->z_mask = z_mask;
1063 
1064     /* Canonicalize s_mask and incorporate data from z_mask. */
1065     rep = clz64(~s_mask);
1066     rep = MAX(rep, clz64(z_mask));
1067     rep = MAX(rep - 1, 0);
1068     ti->s_mask = INT64_MIN >> rep;
1069 
1070     return true;
1071 }
1072 
1073 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1074 {
1075     return fold_masks_zs(ctx, op, z_mask, 0);
1076 }
1077 
1078 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1079 {
1080     return fold_masks_zs(ctx, op, -1, s_mask);
1081 }
1082 
1083 /*
1084  * An "affected" mask bit is 0 if and only if the result is identical
1085  * to the first input.  Thus if the entire mask is 0, the operation
1086  * is equivalent to a copy.
1087  */
1088 static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
1089 {
1090     if (ctx->type == TCG_TYPE_I32) {
1091         a_mask = (uint32_t)a_mask;
1092     }
1093     if (a_mask == 0) {
1094         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1095     }
1096     return false;
1097 }
1098 
1099 /*
1100  * Convert @op to NOT, if NOT is supported by the host.
1101  * Return true f the conversion is successful, which will still
1102  * indicate that the processing is complete.
1103  */
1104 static bool fold_not(OptContext *ctx, TCGOp *op);
1105 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1106 {
1107     TCGOpcode not_op;
1108     bool have_not;
1109 
1110     switch (ctx->type) {
1111     case TCG_TYPE_I32:
1112     case TCG_TYPE_I64:
1113         not_op = INDEX_op_not;
1114         have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1115         break;
1116     case TCG_TYPE_V64:
1117     case TCG_TYPE_V128:
1118     case TCG_TYPE_V256:
1119         not_op = INDEX_op_not_vec;
1120         have_not = TCG_TARGET_HAS_not_vec;
1121         break;
1122     default:
1123         g_assert_not_reached();
1124     }
1125     if (have_not) {
1126         op->opc = not_op;
1127         op->args[1] = op->args[idx];
1128         return fold_not(ctx, op);
1129     }
1130     return false;
1131 }
1132 
1133 /* If the binary operation has first argument @i, fold to @i. */
1134 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1135 {
1136     if (arg_is_const_val(op->args[1], i)) {
1137         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1138     }
1139     return false;
1140 }
1141 
1142 /* If the binary operation has first argument @i, fold to NOT. */
1143 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1144 {
1145     if (arg_is_const_val(op->args[1], i)) {
1146         return fold_to_not(ctx, op, 2);
1147     }
1148     return false;
1149 }
1150 
1151 /* If the binary operation has second argument @i, fold to @i. */
1152 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1153 {
1154     if (arg_is_const_val(op->args[2], i)) {
1155         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1156     }
1157     return false;
1158 }
1159 
1160 /* If the binary operation has second argument @i, fold to identity. */
1161 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1162 {
1163     if (arg_is_const_val(op->args[2], i)) {
1164         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1165     }
1166     return false;
1167 }
1168 
1169 /* If the binary operation has second argument @i, fold to NOT. */
1170 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1171 {
1172     if (arg_is_const_val(op->args[2], i)) {
1173         return fold_to_not(ctx, op, 1);
1174     }
1175     return false;
1176 }
1177 
1178 /* If the binary operation has both arguments equal, fold to @i. */
1179 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1180 {
1181     if (args_are_copies(op->args[1], op->args[2])) {
1182         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1183     }
1184     return false;
1185 }
1186 
1187 /* If the binary operation has both arguments equal, fold to identity. */
1188 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1189 {
1190     if (args_are_copies(op->args[1], op->args[2])) {
1191         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1192     }
1193     return false;
1194 }
1195 
1196 /*
1197  * These outermost fold_<op> functions are sorted alphabetically.
1198  *
1199  * The ordering of the transformations should be:
1200  *   1) those that produce a constant
1201  *   2) those that produce a copy
1202  *   3) those that produce information about the result value.
1203  */
1204 
1205 static bool fold_addco(OptContext *ctx, TCGOp *op);
1206 static bool fold_or(OptContext *ctx, TCGOp *op);
1207 static bool fold_orc(OptContext *ctx, TCGOp *op);
1208 static bool fold_subbo(OptContext *ctx, TCGOp *op);
1209 static bool fold_xor(OptContext *ctx, TCGOp *op);
1210 
1211 static bool fold_add(OptContext *ctx, TCGOp *op)
1212 {
1213     if (fold_const2_commutative(ctx, op) ||
1214         fold_xi_to_x(ctx, op, 0)) {
1215         return true;
1216     }
1217     return finish_folding(ctx, op);
1218 }
1219 
1220 /* We cannot as yet do_constant_folding with vectors. */
1221 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1222 {
1223     if (fold_commutative(ctx, op) ||
1224         fold_xi_to_x(ctx, op, 0)) {
1225         return true;
1226     }
1227     return finish_folding(ctx, op);
1228 }
1229 
1230 static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1231 {
1232     TempOptInfo *t2;
1233 
1234     op = QTAILQ_PREV(op, link);
1235     switch (op->opc) {
1236     case INDEX_op_addco:
1237         op->opc = INDEX_op_add;
1238         fold_add(ctx, op);
1239         break;
1240     case INDEX_op_addcio:
1241         op->opc = INDEX_op_addci;
1242         break;
1243     case INDEX_op_addc1o:
1244         op->opc = INDEX_op_add;
1245         t2 = arg_info(op->args[2]);
1246         if (ti_is_const(t2)) {
1247             op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1248             /* Perform other constant folding, if needed. */
1249             fold_add(ctx, op);
1250         } else {
1251             TCGArg ret = op->args[0];
1252             op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1253             op->args[0] = ret;
1254             op->args[1] = ret;
1255             op->args[2] = arg_new_constant(ctx, 1);
1256         }
1257         break;
1258     default:
1259         g_assert_not_reached();
1260     }
1261 }
1262 
1263 static bool fold_addci(OptContext *ctx, TCGOp *op)
1264 {
1265     fold_commutative(ctx, op);
1266 
1267     if (ctx->carry_state < 0) {
1268         return finish_folding(ctx, op);
1269     }
1270 
1271     squash_prev_carryout(ctx, op);
1272     op->opc = INDEX_op_add;
1273 
1274     if (ctx->carry_state > 0) {
1275         TempOptInfo *t2 = arg_info(op->args[2]);
1276 
1277         /*
1278          * Propagate the known carry-in into a constant, if possible.
1279          * Otherwise emit a second add +1.
1280          */
1281         if (ti_is_const(t2)) {
1282             op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1283         } else {
1284             TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1285 
1286             op2->args[0] = op->args[0];
1287             op2->args[1] = op->args[1];
1288             op2->args[2] = op->args[2];
1289             fold_add(ctx, op2);
1290 
1291             op->args[1] = op->args[0];
1292             op->args[2] = arg_new_constant(ctx, 1);
1293         }
1294     }
1295 
1296     ctx->carry_state = -1;
1297     return fold_add(ctx, op);
1298 }
1299 
1300 static bool fold_addcio(OptContext *ctx, TCGOp *op)
1301 {
1302     TempOptInfo *t1, *t2;
1303     int carry_out = -1;
1304     uint64_t sum, max;
1305 
1306     fold_commutative(ctx, op);
1307     t1 = arg_info(op->args[1]);
1308     t2 = arg_info(op->args[2]);
1309 
1310     /*
1311      * The z_mask value is >= the maximum value that can be represented
1312      * with the known zero bits.  So adding the z_mask values will not
1313      * overflow if and only if the true values cannot overflow.
1314      */
1315     if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1316         !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1317         carry_out = 0;
1318     }
1319 
1320     if (ctx->carry_state < 0) {
1321         ctx->carry_state = carry_out;
1322         return finish_folding(ctx, op);
1323     }
1324 
1325     squash_prev_carryout(ctx, op);
1326     if (ctx->carry_state == 0) {
1327         goto do_addco;
1328     }
1329 
1330     /* Propagate the known carry-in into a constant, if possible. */
1331     max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1332     if (ti_is_const(t2)) {
1333         uint64_t v = ti_const_val(t2) & max;
1334         if (v < max) {
1335             op->args[2] = arg_new_constant(ctx, v + 1);
1336             goto do_addco;
1337         }
1338         /* max + known carry in produces known carry out. */
1339         carry_out = 1;
1340     }
1341     if (ti_is_const(t1)) {
1342         uint64_t v = ti_const_val(t1) & max;
1343         if (v < max) {
1344             op->args[1] = arg_new_constant(ctx, v + 1);
1345             goto do_addco;
1346         }
1347         carry_out = 1;
1348     }
1349 
1350     /* Adjust the opcode to remember the known carry-in. */
1351     op->opc = INDEX_op_addc1o;
1352     ctx->carry_state = carry_out;
1353     return finish_folding(ctx, op);
1354 
1355  do_addco:
1356     op->opc = INDEX_op_addco;
1357     return fold_addco(ctx, op);
1358 }
1359 
1360 static bool fold_addco(OptContext *ctx, TCGOp *op)
1361 {
1362     TempOptInfo *t1, *t2;
1363     int carry_out = -1;
1364     uint64_t ign;
1365 
1366     fold_commutative(ctx, op);
1367     t1 = arg_info(op->args[1]);
1368     t2 = arg_info(op->args[2]);
1369 
1370     if (ti_is_const(t2)) {
1371         uint64_t v2 = ti_const_val(t2);
1372 
1373         if (ti_is_const(t1)) {
1374             uint64_t v1 = ti_const_val(t1);
1375             /* Given sign-extension of z_mask for I32, we need not truncate. */
1376             carry_out = uadd64_overflow(v1, v2, &ign);
1377         } else if (v2 == 0) {
1378             carry_out = 0;
1379         }
1380     } else {
1381         /*
1382          * The z_mask value is >= the maximum value that can be represented
1383          * with the known zero bits.  So adding the z_mask values will not
1384          * overflow if and only if the true values cannot overflow.
1385          */
1386         if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1387             carry_out = 0;
1388         }
1389     }
1390     ctx->carry_state = carry_out;
1391     return finish_folding(ctx, op);
1392 }
1393 
1394 static bool fold_and(OptContext *ctx, TCGOp *op)
1395 {
1396     uint64_t z1, z2, z_mask, s_mask;
1397     TempOptInfo *t1, *t2;
1398 
1399     if (fold_const2_commutative(ctx, op) ||
1400         fold_xi_to_i(ctx, op, 0) ||
1401         fold_xi_to_x(ctx, op, -1) ||
1402         fold_xx_to_x(ctx, op)) {
1403         return true;
1404     }
1405 
1406     t1 = arg_info(op->args[1]);
1407     t2 = arg_info(op->args[2]);
1408     z1 = t1->z_mask;
1409     z2 = t2->z_mask;
1410 
1411     /*
1412      * Known-zeros does not imply known-ones.  Therefore unless
1413      * arg2 is constant, we can't infer affected bits from it.
1414      */
1415     if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
1416         return true;
1417     }
1418 
1419     z_mask = z1 & z2;
1420 
1421     /*
1422      * Sign repetitions are perforce all identical, whether they are 1 or 0.
1423      * Bitwise operations preserve the relative quantity of the repetitions.
1424      */
1425     s_mask = t1->s_mask & t2->s_mask;
1426 
1427     return fold_masks_zs(ctx, op, z_mask, s_mask);
1428 }
1429 
1430 static bool fold_andc(OptContext *ctx, TCGOp *op)
1431 {
1432     uint64_t z_mask, s_mask;
1433     TempOptInfo *t1, *t2;
1434 
1435     if (fold_const2(ctx, op) ||
1436         fold_xx_to_i(ctx, op, 0) ||
1437         fold_xi_to_x(ctx, op, 0) ||
1438         fold_ix_to_not(ctx, op, -1)) {
1439         return true;
1440     }
1441 
1442     t1 = arg_info(op->args[1]);
1443     t2 = arg_info(op->args[2]);
1444     z_mask = t1->z_mask;
1445 
1446     if (ti_is_const(t2)) {
1447         /* Fold andc r,x,i to and r,x,~i. */
1448         switch (ctx->type) {
1449         case TCG_TYPE_I32:
1450         case TCG_TYPE_I64:
1451             op->opc = INDEX_op_and;
1452             break;
1453         case TCG_TYPE_V64:
1454         case TCG_TYPE_V128:
1455         case TCG_TYPE_V256:
1456             op->opc = INDEX_op_and_vec;
1457             break;
1458         default:
1459             g_assert_not_reached();
1460         }
1461         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1462         return fold_and(ctx, op);
1463     }
1464 
1465     /*
1466      * Known-zeros does not imply known-ones.  Therefore unless
1467      * arg2 is constant, we can't infer anything from it.
1468      */
1469     if (ti_is_const(t2)) {
1470         uint64_t v2 = ti_const_val(t2);
1471         if (fold_affected_mask(ctx, op, z_mask & v2)) {
1472             return true;
1473         }
1474         z_mask &= ~v2;
1475     }
1476 
1477     s_mask = t1->s_mask & t2->s_mask;
1478     return fold_masks_zs(ctx, op, z_mask, s_mask);
1479 }
1480 
1481 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1482 {
1483     /* If true and false values are the same, eliminate the cmp. */
1484     if (args_are_copies(op->args[2], op->args[3])) {
1485         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1486     }
1487 
1488     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1489         uint64_t tv = arg_info(op->args[2])->val;
1490         uint64_t fv = arg_info(op->args[3])->val;
1491 
1492         if (tv == -1 && fv == 0) {
1493             return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1494         }
1495         if (tv == 0 && fv == -1) {
1496             if (TCG_TARGET_HAS_not_vec) {
1497                 op->opc = INDEX_op_not_vec;
1498                 return fold_not(ctx, op);
1499             } else {
1500                 op->opc = INDEX_op_xor_vec;
1501                 op->args[2] = arg_new_constant(ctx, -1);
1502                 return fold_xor(ctx, op);
1503             }
1504         }
1505     }
1506     if (arg_is_const(op->args[2])) {
1507         uint64_t tv = arg_info(op->args[2])->val;
1508         if (tv == -1) {
1509             op->opc = INDEX_op_or_vec;
1510             op->args[2] = op->args[3];
1511             return fold_or(ctx, op);
1512         }
1513         if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1514             op->opc = INDEX_op_andc_vec;
1515             op->args[2] = op->args[1];
1516             op->args[1] = op->args[3];
1517             return fold_andc(ctx, op);
1518         }
1519     }
1520     if (arg_is_const(op->args[3])) {
1521         uint64_t fv = arg_info(op->args[3])->val;
1522         if (fv == 0) {
1523             op->opc = INDEX_op_and_vec;
1524             return fold_and(ctx, op);
1525         }
1526         if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1527             op->opc = INDEX_op_orc_vec;
1528             op->args[2] = op->args[1];
1529             op->args[1] = op->args[3];
1530             return fold_orc(ctx, op);
1531         }
1532     }
1533     return finish_folding(ctx, op);
1534 }
1535 
1536 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1537 {
1538     int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1539                                       &op->args[1], &op->args[2]);
1540     if (i == 0) {
1541         tcg_op_remove(ctx->tcg, op);
1542         return true;
1543     }
1544     if (i > 0) {
1545         op->opc = INDEX_op_br;
1546         op->args[0] = op->args[3];
1547         finish_ebb(ctx);
1548     } else {
1549         finish_bb(ctx);
1550     }
1551     return true;
1552 }
1553 
1554 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1555 {
1556     TCGCond cond;
1557     TCGArg label;
1558     int i, inv = 0;
1559 
1560     i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1561     cond = op->args[4];
1562     label = op->args[5];
1563     if (i >= 0) {
1564         goto do_brcond_const;
1565     }
1566 
1567     switch (cond) {
1568     case TCG_COND_LT:
1569     case TCG_COND_GE:
1570         /*
1571          * Simplify LT/GE comparisons vs zero to a single compare
1572          * vs the high word of the input.
1573          */
1574         if (arg_is_const_val(op->args[2], 0) &&
1575             arg_is_const_val(op->args[3], 0)) {
1576             goto do_brcond_high;
1577         }
1578         break;
1579 
1580     case TCG_COND_NE:
1581         inv = 1;
1582         QEMU_FALLTHROUGH;
1583     case TCG_COND_EQ:
1584         /*
1585          * Simplify EQ/NE comparisons where one of the pairs
1586          * can be simplified.
1587          */
1588         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1589                                      op->args[2], cond);
1590         switch (i ^ inv) {
1591         case 0:
1592             goto do_brcond_const;
1593         case 1:
1594             goto do_brcond_high;
1595         }
1596 
1597         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1598                                      op->args[3], cond);
1599         switch (i ^ inv) {
1600         case 0:
1601             goto do_brcond_const;
1602         case 1:
1603             goto do_brcond_low;
1604         }
1605         break;
1606 
1607     case TCG_COND_TSTEQ:
1608     case TCG_COND_TSTNE:
1609         if (arg_is_const_val(op->args[2], 0)) {
1610             goto do_brcond_high;
1611         }
1612         if (arg_is_const_val(op->args[3], 0)) {
1613             goto do_brcond_low;
1614         }
1615         break;
1616 
1617     default:
1618         break;
1619 
1620     do_brcond_low:
1621         op->opc = INDEX_op_brcond;
1622         op->args[1] = op->args[2];
1623         op->args[2] = cond;
1624         op->args[3] = label;
1625         return fold_brcond(ctx, op);
1626 
1627     do_brcond_high:
1628         op->opc = INDEX_op_brcond;
1629         op->args[0] = op->args[1];
1630         op->args[1] = op->args[3];
1631         op->args[2] = cond;
1632         op->args[3] = label;
1633         return fold_brcond(ctx, op);
1634 
1635     do_brcond_const:
1636         if (i == 0) {
1637             tcg_op_remove(ctx->tcg, op);
1638             return true;
1639         }
1640         op->opc = INDEX_op_br;
1641         op->args[0] = label;
1642         finish_ebb(ctx);
1643         return true;
1644     }
1645 
1646     finish_bb(ctx);
1647     return true;
1648 }
1649 
1650 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1651 {
1652     uint64_t z_mask, s_mask, sign;
1653     TempOptInfo *t1 = arg_info(op->args[1]);
1654 
1655     if (ti_is_const(t1)) {
1656         return tcg_opt_gen_movi(ctx, op, op->args[0],
1657                                 do_constant_folding(op->opc, ctx->type,
1658                                                     ti_const_val(t1),
1659                                                     op->args[2]));
1660     }
1661 
1662     z_mask = t1->z_mask;
1663     switch (op->opc) {
1664     case INDEX_op_bswap16:
1665         z_mask = bswap16(z_mask);
1666         sign = INT16_MIN;
1667         break;
1668     case INDEX_op_bswap32:
1669         z_mask = bswap32(z_mask);
1670         sign = INT32_MIN;
1671         break;
1672     case INDEX_op_bswap64:
1673         z_mask = bswap64(z_mask);
1674         sign = INT64_MIN;
1675         break;
1676     default:
1677         g_assert_not_reached();
1678     }
1679 
1680     s_mask = 0;
1681     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1682     case TCG_BSWAP_OZ:
1683         break;
1684     case TCG_BSWAP_OS:
1685         /* If the sign bit may be 1, force all the bits above to 1. */
1686         if (z_mask & sign) {
1687             z_mask |= sign;
1688         }
1689         /* The value and therefore s_mask is explicitly sign-extended. */
1690         s_mask = sign;
1691         break;
1692     default:
1693         /* The high bits are undefined: force all bits above the sign to 1. */
1694         z_mask |= sign << 1;
1695         break;
1696     }
1697 
1698     return fold_masks_zs(ctx, op, z_mask, s_mask);
1699 }
1700 
1701 static bool fold_call(OptContext *ctx, TCGOp *op)
1702 {
1703     TCGContext *s = ctx->tcg;
1704     int nb_oargs = TCGOP_CALLO(op);
1705     int nb_iargs = TCGOP_CALLI(op);
1706     int flags, i;
1707 
1708     init_arguments(ctx, op, nb_oargs + nb_iargs);
1709     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1710 
1711     /* If the function reads or writes globals, reset temp data. */
1712     flags = tcg_call_flags(op);
1713     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1714         int nb_globals = s->nb_globals;
1715 
1716         for (i = 0; i < nb_globals; i++) {
1717             if (test_bit(i, ctx->temps_used.l)) {
1718                 reset_ts(ctx, &ctx->tcg->temps[i]);
1719             }
1720         }
1721     }
1722 
1723     /* If the function has side effects, reset mem data. */
1724     if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1725         remove_mem_copy_all(ctx);
1726     }
1727 
1728     /* Reset temp data for outputs. */
1729     for (i = 0; i < nb_oargs; i++) {
1730         reset_temp(ctx, op->args[i]);
1731     }
1732 
1733     /* Stop optimizing MB across calls. */
1734     ctx->prev_mb = NULL;
1735     return true;
1736 }
1737 
1738 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1739 {
1740     /* Canonicalize the comparison to put immediate second. */
1741     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1742         op->args[3] = tcg_swap_cond(op->args[3]);
1743     }
1744     return finish_folding(ctx, op);
1745 }
1746 
1747 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1748 {
1749     /* If true and false values are the same, eliminate the cmp. */
1750     if (args_are_copies(op->args[3], op->args[4])) {
1751         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1752     }
1753 
1754     /* Canonicalize the comparison to put immediate second. */
1755     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1756         op->args[5] = tcg_swap_cond(op->args[5]);
1757     }
1758     /*
1759      * Canonicalize the "false" input reg to match the destination,
1760      * so that the tcg backend can implement "move if true".
1761      */
1762     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1763         op->args[5] = tcg_invert_cond(op->args[5]);
1764     }
1765     return finish_folding(ctx, op);
1766 }
1767 
1768 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1769 {
1770     uint64_t z_mask, s_mask;
1771     TempOptInfo *t1 = arg_info(op->args[1]);
1772     TempOptInfo *t2 = arg_info(op->args[2]);
1773 
1774     if (ti_is_const(t1)) {
1775         uint64_t t = ti_const_val(t1);
1776 
1777         if (t != 0) {
1778             t = do_constant_folding(op->opc, ctx->type, t, 0);
1779             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1780         }
1781         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1782     }
1783 
1784     switch (ctx->type) {
1785     case TCG_TYPE_I32:
1786         z_mask = 31;
1787         break;
1788     case TCG_TYPE_I64:
1789         z_mask = 63;
1790         break;
1791     default:
1792         g_assert_not_reached();
1793     }
1794     s_mask = ~z_mask;
1795     z_mask |= t2->z_mask;
1796     s_mask &= t2->s_mask;
1797 
1798     return fold_masks_zs(ctx, op, z_mask, s_mask);
1799 }
1800 
1801 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1802 {
1803     uint64_t z_mask;
1804 
1805     if (fold_const1(ctx, op)) {
1806         return true;
1807     }
1808 
1809     switch (ctx->type) {
1810     case TCG_TYPE_I32:
1811         z_mask = 32 | 31;
1812         break;
1813     case TCG_TYPE_I64:
1814         z_mask = 64 | 63;
1815         break;
1816     default:
1817         g_assert_not_reached();
1818     }
1819     return fold_masks_z(ctx, op, z_mask);
1820 }
1821 
1822 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1823 {
1824     TempOptInfo *t1 = arg_info(op->args[1]);
1825     TempOptInfo *t2 = arg_info(op->args[2]);
1826     int ofs = op->args[3];
1827     int len = op->args[4];
1828     int width = 8 * tcg_type_size(ctx->type);
1829     uint64_t z_mask, s_mask;
1830 
1831     if (ti_is_const(t1) && ti_is_const(t2)) {
1832         return tcg_opt_gen_movi(ctx, op, op->args[0],
1833                                 deposit64(ti_const_val(t1), ofs, len,
1834                                           ti_const_val(t2)));
1835     }
1836 
1837     /* Inserting a value into zero at offset 0. */
1838     if (ti_is_const_val(t1, 0) && ofs == 0) {
1839         uint64_t mask = MAKE_64BIT_MASK(0, len);
1840 
1841         op->opc = INDEX_op_and;
1842         op->args[1] = op->args[2];
1843         op->args[2] = arg_new_constant(ctx, mask);
1844         return fold_and(ctx, op);
1845     }
1846 
1847     /* Inserting zero into a value. */
1848     if (ti_is_const_val(t2, 0)) {
1849         uint64_t mask = deposit64(-1, ofs, len, 0);
1850 
1851         op->opc = INDEX_op_and;
1852         op->args[2] = arg_new_constant(ctx, mask);
1853         return fold_and(ctx, op);
1854     }
1855 
1856     /* The s_mask from the top portion of the deposit is still valid. */
1857     if (ofs + len == width) {
1858         s_mask = t2->s_mask << ofs;
1859     } else {
1860         s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1861     }
1862 
1863     z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1864     return fold_masks_zs(ctx, op, z_mask, s_mask);
1865 }
1866 
1867 static bool fold_divide(OptContext *ctx, TCGOp *op)
1868 {
1869     if (fold_const2(ctx, op) ||
1870         fold_xi_to_x(ctx, op, 1)) {
1871         return true;
1872     }
1873     return finish_folding(ctx, op);
1874 }
1875 
1876 static bool fold_dup(OptContext *ctx, TCGOp *op)
1877 {
1878     if (arg_is_const(op->args[1])) {
1879         uint64_t t = arg_info(op->args[1])->val;
1880         t = dup_const(TCGOP_VECE(op), t);
1881         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1882     }
1883     return finish_folding(ctx, op);
1884 }
1885 
1886 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1887 {
1888     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1889         uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1890                                arg_info(op->args[2])->val);
1891         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1892     }
1893 
1894     if (args_are_copies(op->args[1], op->args[2])) {
1895         op->opc = INDEX_op_dup_vec;
1896         TCGOP_VECE(op) = MO_32;
1897     }
1898     return finish_folding(ctx, op);
1899 }
1900 
1901 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1902 {
1903     uint64_t s_mask;
1904     TempOptInfo *t1, *t2;
1905 
1906     if (fold_const2_commutative(ctx, op) ||
1907         fold_xi_to_x(ctx, op, -1) ||
1908         fold_xi_to_not(ctx, op, 0)) {
1909         return true;
1910     }
1911 
1912     t2 = arg_info(op->args[2]);
1913     if (ti_is_const(t2)) {
1914         /* Fold eqv r,x,i to xor r,x,~i. */
1915         switch (ctx->type) {
1916         case TCG_TYPE_I32:
1917         case TCG_TYPE_I64:
1918             op->opc = INDEX_op_xor;
1919             break;
1920         case TCG_TYPE_V64:
1921         case TCG_TYPE_V128:
1922         case TCG_TYPE_V256:
1923             op->opc = INDEX_op_xor_vec;
1924             break;
1925         default:
1926             g_assert_not_reached();
1927         }
1928         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1929         return fold_xor(ctx, op);
1930     }
1931 
1932     t1 = arg_info(op->args[1]);
1933     s_mask = t1->s_mask & t2->s_mask;
1934     return fold_masks_s(ctx, op, s_mask);
1935 }
1936 
1937 static bool fold_extract(OptContext *ctx, TCGOp *op)
1938 {
1939     uint64_t z_mask_old, z_mask;
1940     TempOptInfo *t1 = arg_info(op->args[1]);
1941     int pos = op->args[2];
1942     int len = op->args[3];
1943 
1944     if (ti_is_const(t1)) {
1945         return tcg_opt_gen_movi(ctx, op, op->args[0],
1946                                 extract64(ti_const_val(t1), pos, len));
1947     }
1948 
1949     z_mask_old = t1->z_mask;
1950     z_mask = extract64(z_mask_old, pos, len);
1951     if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
1952         return true;
1953     }
1954 
1955     return fold_masks_z(ctx, op, z_mask);
1956 }
1957 
1958 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1959 {
1960     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1961         uint64_t v1 = arg_info(op->args[1])->val;
1962         uint64_t v2 = arg_info(op->args[2])->val;
1963         int shr = op->args[3];
1964 
1965         if (ctx->type == TCG_TYPE_I32) {
1966             v1 = (uint32_t)v1 >> shr;
1967             v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1968         } else {
1969             v1 >>= shr;
1970             v2 <<= 64 - shr;
1971         }
1972         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1973     }
1974     return finish_folding(ctx, op);
1975 }
1976 
1977 static bool fold_exts(OptContext *ctx, TCGOp *op)
1978 {
1979     uint64_t s_mask, z_mask;
1980     TempOptInfo *t1;
1981 
1982     if (fold_const1(ctx, op)) {
1983         return true;
1984     }
1985 
1986     t1 = arg_info(op->args[1]);
1987     z_mask = t1->z_mask;
1988     s_mask = t1->s_mask;
1989 
1990     switch (op->opc) {
1991     case INDEX_op_ext_i32_i64:
1992         s_mask |= INT32_MIN;
1993         z_mask = (int32_t)z_mask;
1994         break;
1995     default:
1996         g_assert_not_reached();
1997     }
1998     return fold_masks_zs(ctx, op, z_mask, s_mask);
1999 }
2000 
2001 static bool fold_extu(OptContext *ctx, TCGOp *op)
2002 {
2003     uint64_t z_mask;
2004 
2005     if (fold_const1(ctx, op)) {
2006         return true;
2007     }
2008 
2009     z_mask = arg_info(op->args[1])->z_mask;
2010     switch (op->opc) {
2011     case INDEX_op_extrl_i64_i32:
2012     case INDEX_op_extu_i32_i64:
2013         z_mask = (uint32_t)z_mask;
2014         break;
2015     case INDEX_op_extrh_i64_i32:
2016         z_mask >>= 32;
2017         break;
2018     default:
2019         g_assert_not_reached();
2020     }
2021     return fold_masks_z(ctx, op, z_mask);
2022 }
2023 
2024 static bool fold_mb(OptContext *ctx, TCGOp *op)
2025 {
2026     /* Eliminate duplicate and redundant fence instructions.  */
2027     if (ctx->prev_mb) {
2028         /*
2029          * Merge two barriers of the same type into one,
2030          * or a weaker barrier into a stronger one,
2031          * or two weaker barriers into a stronger one.
2032          *   mb X; mb Y => mb X|Y
2033          *   mb; strl => mb; st
2034          *   ldaq; mb => ld; mb
2035          *   ldaq; strl => ld; mb; st
2036          * Other combinations are also merged into a strong
2037          * barrier.  This is stricter than specified but for
2038          * the purposes of TCG is better than not optimizing.
2039          */
2040         ctx->prev_mb->args[0] |= op->args[0];
2041         tcg_op_remove(ctx->tcg, op);
2042     } else {
2043         ctx->prev_mb = op;
2044     }
2045     return true;
2046 }
2047 
2048 static bool fold_mov(OptContext *ctx, TCGOp *op)
2049 {
2050     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2051 }
2052 
2053 static bool fold_movcond(OptContext *ctx, TCGOp *op)
2054 {
2055     uint64_t z_mask, s_mask;
2056     TempOptInfo *tt, *ft;
2057     int i;
2058 
2059     /* If true and false values are the same, eliminate the cmp. */
2060     if (args_are_copies(op->args[3], op->args[4])) {
2061         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2062     }
2063 
2064     /*
2065      * Canonicalize the "false" input reg to match the destination reg so
2066      * that the tcg backend can implement a "move if true" operation.
2067      */
2068     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2069         op->args[5] = tcg_invert_cond(op->args[5]);
2070     }
2071 
2072     i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2073                                   &op->args[2], &op->args[5]);
2074     if (i >= 0) {
2075         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2076     }
2077 
2078     tt = arg_info(op->args[3]);
2079     ft = arg_info(op->args[4]);
2080     z_mask = tt->z_mask | ft->z_mask;
2081     s_mask = tt->s_mask & ft->s_mask;
2082 
2083     if (ti_is_const(tt) && ti_is_const(ft)) {
2084         uint64_t tv = ti_const_val(tt);
2085         uint64_t fv = ti_const_val(ft);
2086         TCGCond cond = op->args[5];
2087 
2088         if (tv == 1 && fv == 0) {
2089             op->opc = INDEX_op_setcond;
2090             op->args[3] = cond;
2091         } else if (fv == 1 && tv == 0) {
2092             op->opc = INDEX_op_setcond;
2093             op->args[3] = tcg_invert_cond(cond);
2094         } else if (tv == -1 && fv == 0) {
2095             op->opc = INDEX_op_negsetcond;
2096             op->args[3] = cond;
2097         } else if (fv == -1 && tv == 0) {
2098             op->opc = INDEX_op_negsetcond;
2099             op->args[3] = tcg_invert_cond(cond);
2100         }
2101     }
2102 
2103     return fold_masks_zs(ctx, op, z_mask, s_mask);
2104 }
2105 
2106 static bool fold_mul(OptContext *ctx, TCGOp *op)
2107 {
2108     if (fold_const2(ctx, op) ||
2109         fold_xi_to_i(ctx, op, 0) ||
2110         fold_xi_to_x(ctx, op, 1)) {
2111         return true;
2112     }
2113     return finish_folding(ctx, op);
2114 }
2115 
2116 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2117 {
2118     if (fold_const2_commutative(ctx, op) ||
2119         fold_xi_to_i(ctx, op, 0)) {
2120         return true;
2121     }
2122     return finish_folding(ctx, op);
2123 }
2124 
2125 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2126 {
2127     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2128 
2129     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2130         uint64_t a = arg_info(op->args[2])->val;
2131         uint64_t b = arg_info(op->args[3])->val;
2132         uint64_t h, l;
2133         TCGArg rl, rh;
2134         TCGOp *op2;
2135 
2136         switch (op->opc) {
2137         case INDEX_op_mulu2:
2138             if (ctx->type == TCG_TYPE_I32) {
2139                 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2140                 h = (int32_t)(l >> 32);
2141                 l = (int32_t)l;
2142             } else {
2143                 mulu64(&l, &h, a, b);
2144             }
2145             break;
2146         case INDEX_op_muls2:
2147             if (ctx->type == TCG_TYPE_I32) {
2148                 l = (int64_t)(int32_t)a * (int32_t)b;
2149                 h = l >> 32;
2150                 l = (int32_t)l;
2151             } else {
2152                 muls64(&l, &h, a, b);
2153             }
2154             break;
2155         default:
2156             g_assert_not_reached();
2157         }
2158 
2159         rl = op->args[0];
2160         rh = op->args[1];
2161 
2162         /* The proper opcode is supplied by tcg_opt_gen_mov. */
2163         op2 = opt_insert_before(ctx, op, 0, 2);
2164 
2165         tcg_opt_gen_movi(ctx, op, rl, l);
2166         tcg_opt_gen_movi(ctx, op2, rh, h);
2167         return true;
2168     }
2169     return finish_folding(ctx, op);
2170 }
2171 
2172 static bool fold_nand(OptContext *ctx, TCGOp *op)
2173 {
2174     uint64_t s_mask;
2175 
2176     if (fold_const2_commutative(ctx, op) ||
2177         fold_xi_to_not(ctx, op, -1)) {
2178         return true;
2179     }
2180 
2181     s_mask = arg_info(op->args[1])->s_mask
2182            & arg_info(op->args[2])->s_mask;
2183     return fold_masks_s(ctx, op, s_mask);
2184 }
2185 
2186 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2187 {
2188     /* Set to 1 all bits to the left of the rightmost.  */
2189     uint64_t z_mask = arg_info(op->args[1])->z_mask;
2190     z_mask = -(z_mask & -z_mask);
2191 
2192     return fold_masks_z(ctx, op, z_mask);
2193 }
2194 
2195 static bool fold_neg(OptContext *ctx, TCGOp *op)
2196 {
2197     return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2198 }
2199 
2200 static bool fold_nor(OptContext *ctx, TCGOp *op)
2201 {
2202     uint64_t s_mask;
2203 
2204     if (fold_const2_commutative(ctx, op) ||
2205         fold_xi_to_not(ctx, op, 0)) {
2206         return true;
2207     }
2208 
2209     s_mask = arg_info(op->args[1])->s_mask
2210            & arg_info(op->args[2])->s_mask;
2211     return fold_masks_s(ctx, op, s_mask);
2212 }
2213 
2214 static bool fold_not(OptContext *ctx, TCGOp *op)
2215 {
2216     if (fold_const1(ctx, op)) {
2217         return true;
2218     }
2219     return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
2220 }
2221 
2222 static bool fold_or(OptContext *ctx, TCGOp *op)
2223 {
2224     uint64_t z_mask, s_mask;
2225     TempOptInfo *t1, *t2;
2226 
2227     if (fold_const2_commutative(ctx, op) ||
2228         fold_xi_to_x(ctx, op, 0) ||
2229         fold_xx_to_x(ctx, op)) {
2230         return true;
2231     }
2232 
2233     t1 = arg_info(op->args[1]);
2234     t2 = arg_info(op->args[2]);
2235     z_mask = t1->z_mask | t2->z_mask;
2236     s_mask = t1->s_mask & t2->s_mask;
2237     return fold_masks_zs(ctx, op, z_mask, s_mask);
2238 }
2239 
2240 static bool fold_orc(OptContext *ctx, TCGOp *op)
2241 {
2242     uint64_t s_mask;
2243     TempOptInfo *t1, *t2;
2244 
2245     if (fold_const2(ctx, op) ||
2246         fold_xx_to_i(ctx, op, -1) ||
2247         fold_xi_to_x(ctx, op, -1) ||
2248         fold_ix_to_not(ctx, op, 0)) {
2249         return true;
2250     }
2251 
2252     t2 = arg_info(op->args[2]);
2253     if (ti_is_const(t2)) {
2254         /* Fold orc r,x,i to or r,x,~i. */
2255         switch (ctx->type) {
2256         case TCG_TYPE_I32:
2257         case TCG_TYPE_I64:
2258             op->opc = INDEX_op_or;
2259             break;
2260         case TCG_TYPE_V64:
2261         case TCG_TYPE_V128:
2262         case TCG_TYPE_V256:
2263             op->opc = INDEX_op_or_vec;
2264             break;
2265         default:
2266             g_assert_not_reached();
2267         }
2268         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2269         return fold_or(ctx, op);
2270     }
2271 
2272     t1 = arg_info(op->args[1]);
2273     s_mask = t1->s_mask & t2->s_mask;
2274     return fold_masks_s(ctx, op, s_mask);
2275 }
2276 
2277 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2278 {
2279     const TCGOpDef *def = &tcg_op_defs[op->opc];
2280     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2281     MemOp mop = get_memop(oi);
2282     int width = 8 * memop_size(mop);
2283     uint64_t z_mask = -1, s_mask = 0;
2284 
2285     if (width < 64) {
2286         if (mop & MO_SIGN) {
2287             s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2288         } else {
2289             z_mask = MAKE_64BIT_MASK(0, width);
2290         }
2291     }
2292 
2293     /* Opcodes that touch guest memory stop the mb optimization.  */
2294     ctx->prev_mb = NULL;
2295 
2296     return fold_masks_zs(ctx, op, z_mask, s_mask);
2297 }
2298 
2299 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2300 {
2301     /* Opcodes that touch guest memory stop the mb optimization.  */
2302     ctx->prev_mb = NULL;
2303     return finish_folding(ctx, op);
2304 }
2305 
2306 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2307 {
2308     /* Opcodes that touch guest memory stop the mb optimization.  */
2309     ctx->prev_mb = NULL;
2310     return true;
2311 }
2312 
2313 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2314 {
2315     if (fold_const2(ctx, op) ||
2316         fold_xx_to_i(ctx, op, 0)) {
2317         return true;
2318     }
2319     return finish_folding(ctx, op);
2320 }
2321 
2322 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
2323 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2324 {
2325     uint64_t a_zmask, b_val;
2326     TCGCond cond;
2327 
2328     if (!arg_is_const(op->args[2])) {
2329         return false;
2330     }
2331 
2332     a_zmask = arg_info(op->args[1])->z_mask;
2333     b_val = arg_info(op->args[2])->val;
2334     cond = op->args[3];
2335 
2336     if (ctx->type == TCG_TYPE_I32) {
2337         a_zmask = (uint32_t)a_zmask;
2338         b_val = (uint32_t)b_val;
2339     }
2340 
2341     /*
2342      * A with only low bits set vs B with high bits set means that A < B.
2343      */
2344     if (a_zmask < b_val) {
2345         bool inv = false;
2346 
2347         switch (cond) {
2348         case TCG_COND_NE:
2349         case TCG_COND_LEU:
2350         case TCG_COND_LTU:
2351             inv = true;
2352             /* fall through */
2353         case TCG_COND_GTU:
2354         case TCG_COND_GEU:
2355         case TCG_COND_EQ:
2356             return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2357         default:
2358             break;
2359         }
2360     }
2361 
2362     /*
2363      * A with only lsb set is already boolean.
2364      */
2365     if (a_zmask <= 1) {
2366         bool convert = false;
2367         bool inv = false;
2368 
2369         switch (cond) {
2370         case TCG_COND_EQ:
2371             inv = true;
2372             /* fall through */
2373         case TCG_COND_NE:
2374             convert = (b_val == 0);
2375             break;
2376         case TCG_COND_LTU:
2377         case TCG_COND_TSTEQ:
2378             inv = true;
2379             /* fall through */
2380         case TCG_COND_GEU:
2381         case TCG_COND_TSTNE:
2382             convert = (b_val == 1);
2383             break;
2384         default:
2385             break;
2386         }
2387         if (convert) {
2388             if (!inv && !neg) {
2389                 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2390             }
2391 
2392             if (!inv) {
2393                 op->opc = INDEX_op_neg;
2394             } else if (neg) {
2395                 op->opc = INDEX_op_add;
2396                 op->args[2] = arg_new_constant(ctx, -1);
2397             } else {
2398                 op->opc = INDEX_op_xor;
2399                 op->args[2] = arg_new_constant(ctx, 1);
2400             }
2401             return -1;
2402         }
2403     }
2404     return 0;
2405 }
2406 
2407 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2408 {
2409     TCGCond cond = op->args[3];
2410     TCGArg ret, src1, src2;
2411     TCGOp *op2;
2412     uint64_t val;
2413     int sh;
2414     bool inv;
2415 
2416     if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2417         return;
2418     }
2419 
2420     src2 = op->args[2];
2421     val = arg_info(src2)->val;
2422     if (!is_power_of_2(val)) {
2423         return;
2424     }
2425     sh = ctz64(val);
2426 
2427     ret = op->args[0];
2428     src1 = op->args[1];
2429     inv = cond == TCG_COND_TSTEQ;
2430 
2431     if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2432         op->opc = INDEX_op_sextract;
2433         op->args[1] = src1;
2434         op->args[2] = sh;
2435         op->args[3] = 1;
2436         return;
2437     } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2438         op->opc = INDEX_op_extract;
2439         op->args[1] = src1;
2440         op->args[2] = sh;
2441         op->args[3] = 1;
2442     } else {
2443         if (sh) {
2444             op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2445             op2->args[0] = ret;
2446             op2->args[1] = src1;
2447             op2->args[2] = arg_new_constant(ctx, sh);
2448             src1 = ret;
2449         }
2450         op->opc = INDEX_op_and;
2451         op->args[1] = src1;
2452         op->args[2] = arg_new_constant(ctx, 1);
2453     }
2454 
2455     if (neg && inv) {
2456         op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2457         op2->args[0] = ret;
2458         op2->args[1] = ret;
2459         op2->args[2] = arg_new_constant(ctx, -1);
2460     } else if (inv) {
2461         op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2462         op2->args[0] = ret;
2463         op2->args[1] = ret;
2464         op2->args[2] = arg_new_constant(ctx, 1);
2465     } else if (neg) {
2466         op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2467         op2->args[0] = ret;
2468         op2->args[1] = ret;
2469     }
2470 }
2471 
2472 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2473 {
2474     int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2475                                       &op->args[2], &op->args[3]);
2476     if (i >= 0) {
2477         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2478     }
2479 
2480     i = fold_setcond_zmask(ctx, op, false);
2481     if (i > 0) {
2482         return true;
2483     }
2484     if (i == 0) {
2485         fold_setcond_tst_pow2(ctx, op, false);
2486     }
2487 
2488     return fold_masks_z(ctx, op, 1);
2489 }
2490 
2491 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2492 {
2493     int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2494                                       &op->args[2], &op->args[3]);
2495     if (i >= 0) {
2496         return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2497     }
2498 
2499     i = fold_setcond_zmask(ctx, op, true);
2500     if (i > 0) {
2501         return true;
2502     }
2503     if (i == 0) {
2504         fold_setcond_tst_pow2(ctx, op, true);
2505     }
2506 
2507     /* Value is {0,-1} so all bits are repetitions of the sign. */
2508     return fold_masks_s(ctx, op, -1);
2509 }
2510 
2511 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2512 {
2513     TCGCond cond;
2514     int i, inv = 0;
2515 
2516     i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2517     cond = op->args[5];
2518     if (i >= 0) {
2519         goto do_setcond_const;
2520     }
2521 
2522     switch (cond) {
2523     case TCG_COND_LT:
2524     case TCG_COND_GE:
2525         /*
2526          * Simplify LT/GE comparisons vs zero to a single compare
2527          * vs the high word of the input.
2528          */
2529         if (arg_is_const_val(op->args[3], 0) &&
2530             arg_is_const_val(op->args[4], 0)) {
2531             goto do_setcond_high;
2532         }
2533         break;
2534 
2535     case TCG_COND_NE:
2536         inv = 1;
2537         QEMU_FALLTHROUGH;
2538     case TCG_COND_EQ:
2539         /*
2540          * Simplify EQ/NE comparisons where one of the pairs
2541          * can be simplified.
2542          */
2543         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2544                                      op->args[3], cond);
2545         switch (i ^ inv) {
2546         case 0:
2547             goto do_setcond_const;
2548         case 1:
2549             goto do_setcond_high;
2550         }
2551 
2552         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2553                                      op->args[4], cond);
2554         switch (i ^ inv) {
2555         case 0:
2556             goto do_setcond_const;
2557         case 1:
2558             goto do_setcond_low;
2559         }
2560         break;
2561 
2562     case TCG_COND_TSTEQ:
2563     case TCG_COND_TSTNE:
2564         if (arg_is_const_val(op->args[3], 0)) {
2565             goto do_setcond_high;
2566         }
2567         if (arg_is_const_val(op->args[4], 0)) {
2568             goto do_setcond_low;
2569         }
2570         break;
2571 
2572     default:
2573         break;
2574 
2575     do_setcond_low:
2576         op->args[2] = op->args[3];
2577         op->args[3] = cond;
2578         op->opc = INDEX_op_setcond;
2579         return fold_setcond(ctx, op);
2580 
2581     do_setcond_high:
2582         op->args[1] = op->args[2];
2583         op->args[2] = op->args[4];
2584         op->args[3] = cond;
2585         op->opc = INDEX_op_setcond;
2586         return fold_setcond(ctx, op);
2587     }
2588 
2589     return fold_masks_z(ctx, op, 1);
2590 
2591  do_setcond_const:
2592     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2593 }
2594 
2595 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2596 {
2597     uint64_t z_mask, s_mask, s_mask_old;
2598     TempOptInfo *t1 = arg_info(op->args[1]);
2599     int pos = op->args[2];
2600     int len = op->args[3];
2601 
2602     if (ti_is_const(t1)) {
2603         return tcg_opt_gen_movi(ctx, op, op->args[0],
2604                                 sextract64(ti_const_val(t1), pos, len));
2605     }
2606 
2607     s_mask_old = t1->s_mask;
2608     s_mask = s_mask_old >> pos;
2609     s_mask |= -1ull << (len - 1);
2610 
2611     if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
2612         return true;
2613     }
2614 
2615     z_mask = sextract64(t1->z_mask, pos, len);
2616     return fold_masks_zs(ctx, op, z_mask, s_mask);
2617 }
2618 
2619 static bool fold_shift(OptContext *ctx, TCGOp *op)
2620 {
2621     uint64_t s_mask, z_mask;
2622     TempOptInfo *t1, *t2;
2623 
2624     if (fold_const2(ctx, op) ||
2625         fold_ix_to_i(ctx, op, 0) ||
2626         fold_xi_to_x(ctx, op, 0)) {
2627         return true;
2628     }
2629 
2630     t1 = arg_info(op->args[1]);
2631     t2 = arg_info(op->args[2]);
2632     s_mask = t1->s_mask;
2633     z_mask = t1->z_mask;
2634 
2635     if (ti_is_const(t2)) {
2636         int sh = ti_const_val(t2);
2637 
2638         z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2639         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2640 
2641         return fold_masks_zs(ctx, op, z_mask, s_mask);
2642     }
2643 
2644     switch (op->opc) {
2645     case INDEX_op_sar:
2646         /*
2647          * Arithmetic right shift will not reduce the number of
2648          * input sign repetitions.
2649          */
2650         return fold_masks_s(ctx, op, s_mask);
2651     case INDEX_op_shr:
2652         /*
2653          * If the sign bit is known zero, then logical right shift
2654          * will not reduce the number of input sign repetitions.
2655          */
2656         if (~z_mask & -s_mask) {
2657             return fold_masks_s(ctx, op, s_mask);
2658         }
2659         break;
2660     default:
2661         break;
2662     }
2663 
2664     return finish_folding(ctx, op);
2665 }
2666 
2667 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2668 {
2669     TCGOpcode neg_op;
2670     bool have_neg;
2671 
2672     if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
2673         return false;
2674     }
2675 
2676     switch (ctx->type) {
2677     case TCG_TYPE_I32:
2678     case TCG_TYPE_I64:
2679         neg_op = INDEX_op_neg;
2680         have_neg = true;
2681         break;
2682     case TCG_TYPE_V64:
2683     case TCG_TYPE_V128:
2684     case TCG_TYPE_V256:
2685         neg_op = INDEX_op_neg_vec;
2686         have_neg = (TCG_TARGET_HAS_neg_vec &&
2687                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2688         break;
2689     default:
2690         g_assert_not_reached();
2691     }
2692     if (have_neg) {
2693         op->opc = neg_op;
2694         op->args[1] = op->args[2];
2695         return fold_neg_no_const(ctx, op);
2696     }
2697     return false;
2698 }
2699 
2700 /* We cannot as yet do_constant_folding with vectors. */
2701 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2702 {
2703     if (fold_xx_to_i(ctx, op, 0) ||
2704         fold_xi_to_x(ctx, op, 0) ||
2705         fold_sub_to_neg(ctx, op)) {
2706         return true;
2707     }
2708     return finish_folding(ctx, op);
2709 }
2710 
2711 static bool fold_sub(OptContext *ctx, TCGOp *op)
2712 {
2713     if (fold_const2(ctx, op) ||
2714         fold_xx_to_i(ctx, op, 0) ||
2715         fold_xi_to_x(ctx, op, 0) ||
2716         fold_sub_to_neg(ctx, op)) {
2717         return true;
2718     }
2719 
2720     /* Fold sub r,x,i to add r,x,-i */
2721     if (arg_is_const(op->args[2])) {
2722         uint64_t val = arg_info(op->args[2])->val;
2723 
2724         op->opc = INDEX_op_add;
2725         op->args[2] = arg_new_constant(ctx, -val);
2726     }
2727     return finish_folding(ctx, op);
2728 }
2729 
2730 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2731 {
2732     TempOptInfo *t2;
2733 
2734     op = QTAILQ_PREV(op, link);
2735     switch (op->opc) {
2736     case INDEX_op_subbo:
2737         op->opc = INDEX_op_sub;
2738         fold_sub(ctx, op);
2739         break;
2740     case INDEX_op_subbio:
2741         op->opc = INDEX_op_subbi;
2742         break;
2743     case INDEX_op_subb1o:
2744         t2 = arg_info(op->args[2]);
2745         if (ti_is_const(t2)) {
2746             op->opc = INDEX_op_add;
2747             op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2748             /* Perform other constant folding, if needed. */
2749             fold_add(ctx, op);
2750         } else {
2751             TCGArg ret = op->args[0];
2752             op->opc = INDEX_op_sub;
2753             op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2754             op->args[0] = ret;
2755             op->args[1] = ret;
2756             op->args[2] = arg_new_constant(ctx, -1);
2757         }
2758         break;
2759     default:
2760         g_assert_not_reached();
2761     }
2762 }
2763 
2764 static bool fold_subbi(OptContext *ctx, TCGOp *op)
2765 {
2766     TempOptInfo *t2;
2767     int borrow_in = ctx->carry_state;
2768 
2769     if (borrow_in < 0) {
2770         return finish_folding(ctx, op);
2771     }
2772     ctx->carry_state = -1;
2773 
2774     squash_prev_borrowout(ctx, op);
2775     if (borrow_in == 0) {
2776         op->opc = INDEX_op_sub;
2777         return fold_sub(ctx, op);
2778     }
2779 
2780     /*
2781      * Propagate the known carry-in into any constant, then negate to
2782      * transform from sub to add.  If there is no constant, emit a
2783      * separate add -1.
2784      */
2785     t2 = arg_info(op->args[2]);
2786     if (ti_is_const(t2)) {
2787         op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2788     } else {
2789         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2790 
2791         op2->args[0] = op->args[0];
2792         op2->args[1] = op->args[1];
2793         op2->args[2] = op->args[2];
2794         fold_sub(ctx, op2);
2795 
2796         op->args[1] = op->args[0];
2797         op->args[2] = arg_new_constant(ctx, -1);
2798     }
2799     op->opc = INDEX_op_add;
2800     return fold_add(ctx, op);
2801 }
2802 
2803 static bool fold_subbio(OptContext *ctx, TCGOp *op)
2804 {
2805     TempOptInfo *t1, *t2;
2806     int borrow_out = -1;
2807 
2808     if (ctx->carry_state < 0) {
2809         return finish_folding(ctx, op);
2810     }
2811 
2812     squash_prev_borrowout(ctx, op);
2813     if (ctx->carry_state == 0) {
2814         goto do_subbo;
2815     }
2816 
2817     t1 = arg_info(op->args[1]);
2818     t2 = arg_info(op->args[2]);
2819 
2820     /* Propagate the known borrow-in into a constant, if possible. */
2821     if (ti_is_const(t2)) {
2822         uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2823         uint64_t v = ti_const_val(t2) & max;
2824 
2825         if (v < max) {
2826             op->args[2] = arg_new_constant(ctx, v + 1);
2827             goto do_subbo;
2828         }
2829         /* subtracting max + 1 produces known borrow out. */
2830         borrow_out = 1;
2831     }
2832     if (ti_is_const(t1)) {
2833         uint64_t v = ti_const_val(t1);
2834         if (v != 0) {
2835             op->args[2] = arg_new_constant(ctx, v - 1);
2836             goto do_subbo;
2837         }
2838     }
2839 
2840     /* Adjust the opcode to remember the known carry-in. */
2841     op->opc = INDEX_op_subb1o;
2842     ctx->carry_state = borrow_out;
2843     return finish_folding(ctx, op);
2844 
2845  do_subbo:
2846     op->opc = INDEX_op_subbo;
2847     return fold_subbo(ctx, op);
2848 }
2849 
2850 static bool fold_subbo(OptContext *ctx, TCGOp *op)
2851 {
2852     TempOptInfo *t1 = arg_info(op->args[1]);
2853     TempOptInfo *t2 = arg_info(op->args[2]);
2854     int borrow_out = -1;
2855 
2856     if (ti_is_const(t2)) {
2857         uint64_t v2 = ti_const_val(t2);
2858         if (v2 == 0) {
2859             borrow_out = 0;
2860         } else if (ti_is_const(t1)) {
2861             uint64_t v1 = ti_const_val(t1);
2862             borrow_out = v1 < v2;
2863         }
2864     }
2865     ctx->carry_state = borrow_out;
2866     return finish_folding(ctx, op);
2867 }
2868 
2869 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2870 {
2871     uint64_t z_mask = -1, s_mask = 0;
2872 
2873     /* We can't do any folding with a load, but we can record bits. */
2874     switch (op->opc) {
2875     case INDEX_op_ld8s:
2876         s_mask = INT8_MIN;
2877         break;
2878     case INDEX_op_ld8u:
2879         z_mask = MAKE_64BIT_MASK(0, 8);
2880         break;
2881     case INDEX_op_ld16s:
2882         s_mask = INT16_MIN;
2883         break;
2884     case INDEX_op_ld16u:
2885         z_mask = MAKE_64BIT_MASK(0, 16);
2886         break;
2887     case INDEX_op_ld32s:
2888         s_mask = INT32_MIN;
2889         break;
2890     case INDEX_op_ld32u:
2891         z_mask = MAKE_64BIT_MASK(0, 32);
2892         break;
2893     default:
2894         g_assert_not_reached();
2895     }
2896     return fold_masks_zs(ctx, op, z_mask, s_mask);
2897 }
2898 
2899 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2900 {
2901     TCGTemp *dst, *src;
2902     intptr_t ofs;
2903     TCGType type;
2904 
2905     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2906         return finish_folding(ctx, op);
2907     }
2908 
2909     type = ctx->type;
2910     ofs = op->args[2];
2911     dst = arg_temp(op->args[0]);
2912     src = find_mem_copy_for(ctx, type, ofs);
2913     if (src && src->base_type == type) {
2914         return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
2915     }
2916 
2917     reset_ts(ctx, dst);
2918     record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
2919     return true;
2920 }
2921 
2922 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
2923 {
2924     intptr_t ofs = op->args[2];
2925     intptr_t lm1;
2926 
2927     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2928         remove_mem_copy_all(ctx);
2929         return true;
2930     }
2931 
2932     switch (op->opc) {
2933     case INDEX_op_st8:
2934         lm1 = 0;
2935         break;
2936     case INDEX_op_st16:
2937         lm1 = 1;
2938         break;
2939     case INDEX_op_st32:
2940         lm1 = 3;
2941         break;
2942     case INDEX_op_st:
2943     case INDEX_op_st_vec:
2944         lm1 = tcg_type_size(ctx->type) - 1;
2945         break;
2946     default:
2947         g_assert_not_reached();
2948     }
2949     remove_mem_copy_in(ctx, ofs, ofs + lm1);
2950     return true;
2951 }
2952 
2953 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
2954 {
2955     TCGTemp *src;
2956     intptr_t ofs, last;
2957     TCGType type;
2958 
2959     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2960         return fold_tcg_st(ctx, op);
2961     }
2962 
2963     src = arg_temp(op->args[0]);
2964     ofs = op->args[2];
2965     type = ctx->type;
2966 
2967     /*
2968      * Eliminate duplicate stores of a constant.
2969      * This happens frequently when the target ISA zero-extends.
2970      */
2971     if (ts_is_const(src)) {
2972         TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
2973         if (src == prev) {
2974             tcg_op_remove(ctx->tcg, op);
2975             return true;
2976         }
2977     }
2978 
2979     last = ofs + tcg_type_size(type) - 1;
2980     remove_mem_copy_in(ctx, ofs, last);
2981     record_mem_copy(ctx, type, src, ofs, last);
2982     return true;
2983 }
2984 
2985 static bool fold_xor(OptContext *ctx, TCGOp *op)
2986 {
2987     uint64_t z_mask, s_mask;
2988     TempOptInfo *t1, *t2;
2989 
2990     if (fold_const2_commutative(ctx, op) ||
2991         fold_xx_to_i(ctx, op, 0) ||
2992         fold_xi_to_x(ctx, op, 0) ||
2993         fold_xi_to_not(ctx, op, -1)) {
2994         return true;
2995     }
2996 
2997     t1 = arg_info(op->args[1]);
2998     t2 = arg_info(op->args[2]);
2999     z_mask = t1->z_mask | t2->z_mask;
3000     s_mask = t1->s_mask & t2->s_mask;
3001     return fold_masks_zs(ctx, op, z_mask, s_mask);
3002 }
3003 
3004 /* Propagate constants and copies, fold constant expressions. */
3005 void tcg_optimize(TCGContext *s)
3006 {
3007     int nb_temps, i;
3008     TCGOp *op, *op_next;
3009     OptContext ctx = { .tcg = s };
3010 
3011     QSIMPLEQ_INIT(&ctx.mem_free);
3012 
3013     /* Array VALS has an element for each temp.
3014        If this temp holds a constant then its value is kept in VALS' element.
3015        If this temp is a copy of other ones then the other copies are
3016        available through the doubly linked circular list. */
3017 
3018     nb_temps = s->nb_temps;
3019     for (i = 0; i < nb_temps; ++i) {
3020         s->temps[i].state_ptr = NULL;
3021     }
3022 
3023     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3024         TCGOpcode opc = op->opc;
3025         const TCGOpDef *def;
3026         bool done = false;
3027 
3028         /* Calls are special. */
3029         if (opc == INDEX_op_call) {
3030             fold_call(&ctx, op);
3031             continue;
3032         }
3033 
3034         def = &tcg_op_defs[opc];
3035         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3036         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3037 
3038         /* Pre-compute the type of the operation. */
3039         ctx.type = TCGOP_TYPE(op);
3040 
3041         /*
3042          * Process each opcode.
3043          * Sorted alphabetically by opcode as much as possible.
3044          */
3045         switch (opc) {
3046         case INDEX_op_add:
3047             done = fold_add(&ctx, op);
3048             break;
3049         case INDEX_op_add_vec:
3050             done = fold_add_vec(&ctx, op);
3051             break;
3052         case INDEX_op_addci:
3053             done = fold_addci(&ctx, op);
3054             break;
3055         case INDEX_op_addcio:
3056             done = fold_addcio(&ctx, op);
3057             break;
3058         case INDEX_op_addco:
3059             done = fold_addco(&ctx, op);
3060             break;
3061         case INDEX_op_and:
3062         case INDEX_op_and_vec:
3063             done = fold_and(&ctx, op);
3064             break;
3065         case INDEX_op_andc:
3066         case INDEX_op_andc_vec:
3067             done = fold_andc(&ctx, op);
3068             break;
3069         case INDEX_op_brcond:
3070             done = fold_brcond(&ctx, op);
3071             break;
3072         case INDEX_op_brcond2_i32:
3073             done = fold_brcond2(&ctx, op);
3074             break;
3075         case INDEX_op_bswap16:
3076         case INDEX_op_bswap32:
3077         case INDEX_op_bswap64:
3078             done = fold_bswap(&ctx, op);
3079             break;
3080         case INDEX_op_clz:
3081         case INDEX_op_ctz:
3082             done = fold_count_zeros(&ctx, op);
3083             break;
3084         case INDEX_op_ctpop:
3085             done = fold_ctpop(&ctx, op);
3086             break;
3087         case INDEX_op_deposit:
3088             done = fold_deposit(&ctx, op);
3089             break;
3090         case INDEX_op_divs:
3091         case INDEX_op_divu:
3092             done = fold_divide(&ctx, op);
3093             break;
3094         case INDEX_op_dup_vec:
3095             done = fold_dup(&ctx, op);
3096             break;
3097         case INDEX_op_dup2_vec:
3098             done = fold_dup2(&ctx, op);
3099             break;
3100         case INDEX_op_eqv:
3101         case INDEX_op_eqv_vec:
3102             done = fold_eqv(&ctx, op);
3103             break;
3104         case INDEX_op_extract:
3105             done = fold_extract(&ctx, op);
3106             break;
3107         case INDEX_op_extract2:
3108             done = fold_extract2(&ctx, op);
3109             break;
3110         case INDEX_op_ext_i32_i64:
3111             done = fold_exts(&ctx, op);
3112             break;
3113         case INDEX_op_extu_i32_i64:
3114         case INDEX_op_extrl_i64_i32:
3115         case INDEX_op_extrh_i64_i32:
3116             done = fold_extu(&ctx, op);
3117             break;
3118         case INDEX_op_ld8s:
3119         case INDEX_op_ld8u:
3120         case INDEX_op_ld16s:
3121         case INDEX_op_ld16u:
3122         case INDEX_op_ld32s:
3123         case INDEX_op_ld32u:
3124             done = fold_tcg_ld(&ctx, op);
3125             break;
3126         case INDEX_op_ld:
3127         case INDEX_op_ld_vec:
3128             done = fold_tcg_ld_memcopy(&ctx, op);
3129             break;
3130         case INDEX_op_st8:
3131         case INDEX_op_st16:
3132         case INDEX_op_st32:
3133             done = fold_tcg_st(&ctx, op);
3134             break;
3135         case INDEX_op_st:
3136         case INDEX_op_st_vec:
3137             done = fold_tcg_st_memcopy(&ctx, op);
3138             break;
3139         case INDEX_op_mb:
3140             done = fold_mb(&ctx, op);
3141             break;
3142         case INDEX_op_mov:
3143         case INDEX_op_mov_vec:
3144             done = fold_mov(&ctx, op);
3145             break;
3146         case INDEX_op_movcond:
3147             done = fold_movcond(&ctx, op);
3148             break;
3149         case INDEX_op_mul:
3150             done = fold_mul(&ctx, op);
3151             break;
3152         case INDEX_op_mulsh:
3153         case INDEX_op_muluh:
3154             done = fold_mul_highpart(&ctx, op);
3155             break;
3156         case INDEX_op_muls2:
3157         case INDEX_op_mulu2:
3158             done = fold_multiply2(&ctx, op);
3159             break;
3160         case INDEX_op_nand:
3161         case INDEX_op_nand_vec:
3162             done = fold_nand(&ctx, op);
3163             break;
3164         case INDEX_op_neg:
3165             done = fold_neg(&ctx, op);
3166             break;
3167         case INDEX_op_nor:
3168         case INDEX_op_nor_vec:
3169             done = fold_nor(&ctx, op);
3170             break;
3171         case INDEX_op_not:
3172         case INDEX_op_not_vec:
3173             done = fold_not(&ctx, op);
3174             break;
3175         case INDEX_op_or:
3176         case INDEX_op_or_vec:
3177             done = fold_or(&ctx, op);
3178             break;
3179         case INDEX_op_orc:
3180         case INDEX_op_orc_vec:
3181             done = fold_orc(&ctx, op);
3182             break;
3183         case INDEX_op_qemu_ld:
3184             done = fold_qemu_ld_1reg(&ctx, op);
3185             break;
3186         case INDEX_op_qemu_ld2:
3187             done = fold_qemu_ld_2reg(&ctx, op);
3188             break;
3189         case INDEX_op_qemu_st:
3190         case INDEX_op_qemu_st2:
3191             done = fold_qemu_st(&ctx, op);
3192             break;
3193         case INDEX_op_rems:
3194         case INDEX_op_remu:
3195             done = fold_remainder(&ctx, op);
3196             break;
3197         case INDEX_op_rotl:
3198         case INDEX_op_rotr:
3199         case INDEX_op_sar:
3200         case INDEX_op_shl:
3201         case INDEX_op_shr:
3202             done = fold_shift(&ctx, op);
3203             break;
3204         case INDEX_op_setcond:
3205             done = fold_setcond(&ctx, op);
3206             break;
3207         case INDEX_op_negsetcond:
3208             done = fold_negsetcond(&ctx, op);
3209             break;
3210         case INDEX_op_setcond2_i32:
3211             done = fold_setcond2(&ctx, op);
3212             break;
3213         case INDEX_op_cmp_vec:
3214             done = fold_cmp_vec(&ctx, op);
3215             break;
3216         case INDEX_op_cmpsel_vec:
3217             done = fold_cmpsel_vec(&ctx, op);
3218             break;
3219         case INDEX_op_bitsel_vec:
3220             done = fold_bitsel_vec(&ctx, op);
3221             break;
3222         case INDEX_op_sextract:
3223             done = fold_sextract(&ctx, op);
3224             break;
3225         case INDEX_op_sub:
3226             done = fold_sub(&ctx, op);
3227             break;
3228         case INDEX_op_subbi:
3229             done = fold_subbi(&ctx, op);
3230             break;
3231         case INDEX_op_subbio:
3232             done = fold_subbio(&ctx, op);
3233             break;
3234         case INDEX_op_subbo:
3235             done = fold_subbo(&ctx, op);
3236             break;
3237         case INDEX_op_sub_vec:
3238             done = fold_sub_vec(&ctx, op);
3239             break;
3240         case INDEX_op_xor:
3241         case INDEX_op_xor_vec:
3242             done = fold_xor(&ctx, op);
3243             break;
3244         case INDEX_op_set_label:
3245         case INDEX_op_br:
3246         case INDEX_op_exit_tb:
3247         case INDEX_op_goto_tb:
3248         case INDEX_op_goto_ptr:
3249             finish_ebb(&ctx);
3250             done = true;
3251             break;
3252         default:
3253             done = finish_folding(&ctx, op);
3254             break;
3255         }
3256         tcg_debug_assert(done);
3257     }
3258 }
3259