xref: /qemu/target/s390x/tcg/mem_helper.c (revision fb5c28e1955537228fe59a901e6cf6258da682d5)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/page-protection.h"
29 #include "exec/cpu_ldst.h"
30 #include "hw/core/tcg-cpu-ops.h"
31 #include "qemu/int128.h"
32 #include "qemu/atomic128.h"
33 
34 #if defined(CONFIG_USER_ONLY)
35 #include "user/page-protection.h"
36 #else
37 #include "hw/s390x/storage-keys.h"
38 #include "hw/boards.h"
39 #endif
40 
41 #ifdef CONFIG_USER_ONLY
42 # define user_or_likely(X)    true
43 #else
44 # define user_or_likely(X)    likely(X)
45 #endif
46 
47 /*****************************************************************************/
48 /* Softmmu support */
49 
50 /* #define DEBUG_HELPER */
51 #ifdef DEBUG_HELPER
52 #define HELPER_LOG(x...) qemu_log(x)
53 #else
54 #define HELPER_LOG(x...)
55 #endif
56 
57 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
58 {
59     uint16_t pkm = env->cregs[3] >> 16;
60 
61     if (env->psw.mask & PSW_MASK_PSTATE) {
62         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
63         return pkm & (0x8000 >> psw_key);
64     }
65     return true;
66 }
67 
68 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
69                                    uint64_t src, uint32_t len)
70 {
71     if (!len || src == dest) {
72         return false;
73     }
74     /* Take care of wrapping at the end of address space. */
75     if (unlikely(wrap_address(env, src + len - 1) < src)) {
76         return dest > src || dest <= wrap_address(env, src + len - 1);
77     }
78     return dest > src && dest <= src + len - 1;
79 }
80 
81 /* Trigger a SPECIFICATION exception if an address or a length is not
82    naturally aligned.  */
83 static inline void check_alignment(CPUS390XState *env, uint64_t v,
84                                    int wordsize, uintptr_t ra)
85 {
86     if (v % wordsize) {
87         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
88     }
89 }
90 
91 /* Load a value from memory according to its size.  */
92 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
93                                            int wordsize, uintptr_t ra)
94 {
95     switch (wordsize) {
96     case 1:
97         return cpu_ldub_data_ra(env, addr, ra);
98     case 2:
99         return cpu_lduw_data_ra(env, addr, ra);
100     default:
101         abort();
102     }
103 }
104 
105 /* Store a to memory according to its size.  */
106 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
107                                       uint64_t value, int wordsize,
108                                       uintptr_t ra)
109 {
110     switch (wordsize) {
111     case 1:
112         cpu_stb_data_ra(env, addr, value, ra);
113         break;
114     case 2:
115         cpu_stw_data_ra(env, addr, value, ra);
116         break;
117     default:
118         abort();
119     }
120 }
121 
122 /* An access covers at most 4096 bytes and therefore at most two pages. */
123 typedef struct S390Access {
124     target_ulong vaddr1;
125     target_ulong vaddr2;
126     void *haddr1;
127     void *haddr2;
128     uint16_t size1;
129     uint16_t size2;
130     /*
131      * If we can't access the host page directly, we'll have to do I/O access
132      * via ld/st helpers. These are internal details, so we store the
133      * mmu idx to do the access here instead of passing it around in the
134      * helpers.
135      */
136     int mmu_idx;
137 } S390Access;
138 
139 /*
140  * With nonfault=1, return the PGM_ exception that would have been injected
141  * into the guest; return 0 if no exception was detected.
142  *
143  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
144  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
145  */
146 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
147                                     int size, MMUAccessType access_type,
148                                     int mmu_idx, bool nonfault,
149                                     void **phost, uintptr_t ra)
150 {
151     int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
152                                    nonfault, phost, ra);
153 
154     if (unlikely(flags & TLB_INVALID_MASK)) {
155 #ifdef CONFIG_USER_ONLY
156         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
157         env->__excp_addr = addr & TARGET_PAGE_MASK;
158         return (page_get_flags(addr) & PAGE_VALID
159                 ? PGM_PROTECTION : PGM_ADDRESSING);
160 #else
161         return env->tlb_fill_exc;
162 #endif
163     }
164 
165 #ifndef CONFIG_USER_ONLY
166     if (unlikely(flags & TLB_WATCHPOINT)) {
167         /* S390 does not presently use transaction attributes. */
168         cpu_check_watchpoint(env_cpu(env), addr, size,
169                              MEMTXATTRS_UNSPECIFIED,
170                              (access_type == MMU_DATA_STORE
171                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
172     }
173 #endif
174 
175     return 0;
176 }
177 
178 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
179                              bool nonfault, vaddr vaddr1, int size,
180                              MMUAccessType access_type,
181                              int mmu_idx, uintptr_t ra)
182 {
183     int size1, size2, exc;
184 
185     assert(size > 0 && size <= 4096);
186 
187     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
188     size2 = size - size1;
189 
190     memset(access, 0, sizeof(*access));
191     access->vaddr1 = vaddr1;
192     access->size1 = size1;
193     access->size2 = size2;
194     access->mmu_idx = mmu_idx;
195 
196     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
197                             &access->haddr1, ra);
198     if (unlikely(exc)) {
199         return exc;
200     }
201     if (unlikely(size2)) {
202         /* The access crosses page boundaries. */
203         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
204 
205         access->vaddr2 = vaddr2;
206         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
207                                 nonfault, &access->haddr2, ra);
208         if (unlikely(exc)) {
209             return exc;
210         }
211     }
212     return 0;
213 }
214 
215 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
216                                   vaddr vaddr, int size,
217                                   MMUAccessType access_type, int mmu_idx,
218                                   uintptr_t ra)
219 {
220     int exc = access_prepare_nf(ret, env, false, vaddr, size,
221                                 access_type, mmu_idx, ra);
222     assert(!exc);
223 }
224 
225 /* Helper to handle memset on a single page. */
226 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
227                              uint8_t byte, uint16_t size, int mmu_idx,
228                              uintptr_t ra)
229 {
230     if (user_or_likely(haddr)) {
231         memset(haddr, byte, size);
232     } else {
233         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
234         for (int i = 0; i < size; i++) {
235             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
236         }
237     }
238 }
239 
240 static void access_memset(CPUS390XState *env, S390Access *desta,
241                           uint8_t byte, uintptr_t ra)
242 {
243     set_helper_retaddr(ra);
244     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
245                      desta->mmu_idx, ra);
246     if (unlikely(desta->size2)) {
247         do_access_memset(env, desta->vaddr2, desta->haddr2, byte,
248                          desta->size2, desta->mmu_idx, ra);
249     }
250     clear_helper_retaddr();
251 }
252 
253 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
254                                int offset, uintptr_t ra)
255 {
256     target_ulong vaddr = access->vaddr1;
257     void *haddr = access->haddr1;
258 
259     if (unlikely(offset >= access->size1)) {
260         offset -= access->size1;
261         vaddr = access->vaddr2;
262         haddr = access->haddr2;
263     }
264 
265     if (user_or_likely(haddr)) {
266         return ldub_p(haddr + offset);
267     } else {
268         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
269         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
270     }
271 }
272 
273 static void access_set_byte(CPUS390XState *env, S390Access *access,
274                             int offset, uint8_t byte, uintptr_t ra)
275 {
276     target_ulong vaddr = access->vaddr1;
277     void *haddr = access->haddr1;
278 
279     if (unlikely(offset >= access->size1)) {
280         offset -= access->size1;
281         vaddr = access->vaddr2;
282         haddr = access->haddr2;
283     }
284 
285     if (user_or_likely(haddr)) {
286         stb_p(haddr + offset, byte);
287     } else {
288         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
289         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
290     }
291 }
292 
293 /*
294  * Move data with the same semantics as memmove() in case ranges don't overlap
295  * or src > dest. Undefined behavior on destructive overlaps.
296  */
297 static void access_memmove(CPUS390XState *env, S390Access *desta,
298                            S390Access *srca, uintptr_t ra)
299 {
300     int len = desta->size1 + desta->size2;
301 
302     assert(len == srca->size1 + srca->size2);
303 
304     /* Fallback to slow access in case we don't have access to all host pages */
305     if (user_or_likely(desta->haddr1 &&
306                        srca->haddr1 &&
307                        (!desta->size2 || desta->haddr2) &&
308                        (!srca->size2 || srca->haddr2))) {
309         int diff = desta->size1 - srca->size1;
310 
311         if (likely(diff == 0)) {
312             memmove(desta->haddr1, srca->haddr1, srca->size1);
313             if (unlikely(srca->size2)) {
314                 memmove(desta->haddr2, srca->haddr2, srca->size2);
315             }
316         } else if (diff > 0) {
317             memmove(desta->haddr1, srca->haddr1, srca->size1);
318             memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
319             if (likely(desta->size2)) {
320                 memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
321             }
322         } else {
323             diff = -diff;
324             memmove(desta->haddr1, srca->haddr1, desta->size1);
325             memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
326             if (likely(srca->size2)) {
327                 memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
328             }
329         }
330     } else {
331         for (int i = 0; i < len; i++) {
332             uint8_t byte = access_get_byte(env, srca, i, ra);
333             access_set_byte(env, desta, i, byte, ra);
334         }
335     }
336 }
337 
338 static int mmu_idx_from_as(uint8_t as)
339 {
340     switch (as) {
341     case AS_PRIMARY:
342         return MMU_PRIMARY_IDX;
343     case AS_SECONDARY:
344         return MMU_SECONDARY_IDX;
345     case AS_HOME:
346         return MMU_HOME_IDX;
347     default:
348         /* FIXME AS_ACCREG */
349         g_assert_not_reached();
350     }
351 }
352 
353 /* and on array */
354 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
355                              uint64_t src, uintptr_t ra)
356 {
357     const int mmu_idx = s390x_env_mmu_index(env, false);
358     S390Access srca1, srca2, desta;
359     uint32_t i;
360     uint8_t c = 0;
361 
362     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
363                __func__, l, dest, src);
364 
365     /* NC always processes one more byte than specified - maximum is 256 */
366     l++;
367 
368     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
369     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
370     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
371     set_helper_retaddr(ra);
372 
373     for (i = 0; i < l; i++) {
374         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
375                           access_get_byte(env, &srca2, i, ra);
376 
377         c |= x;
378         access_set_byte(env, &desta, i, x, ra);
379     }
380 
381     clear_helper_retaddr();
382     return c != 0;
383 }
384 
385 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
386                     uint64_t src)
387 {
388     return do_helper_nc(env, l, dest, src, GETPC());
389 }
390 
391 /* xor on array */
392 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
393                              uint64_t src, uintptr_t ra)
394 {
395     const int mmu_idx = s390x_env_mmu_index(env, false);
396     S390Access srca1, srca2, desta;
397     uint32_t i;
398     uint8_t c = 0;
399 
400     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
401                __func__, l, dest, src);
402 
403     /* XC always processes one more byte than specified - maximum is 256 */
404     l++;
405 
406     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
407     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
408     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
409 
410     /* xor with itself is the same as memset(0) */
411     if (src == dest) {
412         access_memset(env, &desta, 0, ra);
413         return 0;
414     }
415 
416     set_helper_retaddr(ra);
417     for (i = 0; i < l; i++) {
418         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
419                           access_get_byte(env, &srca2, i, ra);
420 
421         c |= x;
422         access_set_byte(env, &desta, i, x, ra);
423     }
424     clear_helper_retaddr();
425     return c != 0;
426 }
427 
428 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
429                     uint64_t src)
430 {
431     return do_helper_xc(env, l, dest, src, GETPC());
432 }
433 
434 /* or on array */
435 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
436                              uint64_t src, uintptr_t ra)
437 {
438     const int mmu_idx = s390x_env_mmu_index(env, false);
439     S390Access srca1, srca2, desta;
440     uint32_t i;
441     uint8_t c = 0;
442 
443     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
444                __func__, l, dest, src);
445 
446     /* OC always processes one more byte than specified - maximum is 256 */
447     l++;
448 
449     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
450     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
451     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
452     set_helper_retaddr(ra);
453 
454     for (i = 0; i < l; i++) {
455         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
456                           access_get_byte(env, &srca2, i, ra);
457 
458         c |= x;
459         access_set_byte(env, &desta, i, x, ra);
460     }
461 
462     clear_helper_retaddr();
463     return c != 0;
464 }
465 
466 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
467                     uint64_t src)
468 {
469     return do_helper_oc(env, l, dest, src, GETPC());
470 }
471 
472 /* memmove */
473 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
474                               uint64_t src, uintptr_t ra)
475 {
476     const int mmu_idx = s390x_env_mmu_index(env, false);
477     S390Access srca, desta;
478     uint32_t i;
479 
480     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
481                __func__, l, dest, src);
482 
483     /* MVC always copies one more byte than specified - maximum is 256 */
484     l++;
485 
486     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
487     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
488 
489     /*
490      * "When the operands overlap, the result is obtained as if the operands
491      * were processed one byte at a time". Only non-destructive overlaps
492      * behave like memmove().
493      */
494     if (dest == src + 1) {
495         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
496     } else if (!is_destructive_overlap(env, dest, src, l)) {
497         access_memmove(env, &desta, &srca, ra);
498     } else {
499         set_helper_retaddr(ra);
500         for (i = 0; i < l; i++) {
501             uint8_t byte = access_get_byte(env, &srca, i, ra);
502 
503             access_set_byte(env, &desta, i, byte, ra);
504         }
505         clear_helper_retaddr();
506     }
507 
508     return env->cc_op;
509 }
510 
511 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
512 {
513     do_helper_mvc(env, l, dest, src, GETPC());
514 }
515 
516 /* move right to left */
517 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
518 {
519     const int mmu_idx = s390x_env_mmu_index(env, false);
520     const uint64_t ra = GETPC();
521     S390Access srca, desta;
522     int32_t i;
523 
524     /* MVCRL always copies one more byte than specified - maximum is 256 */
525     l &= 0xff;
526     l++;
527 
528     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
529     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
530 
531     set_helper_retaddr(ra);
532     for (i = l - 1; i >= 0; i--) {
533         uint8_t byte = access_get_byte(env, &srca, i, ra);
534         access_set_byte(env, &desta, i, byte, ra);
535     }
536     clear_helper_retaddr();
537 }
538 
539 /* move inverse  */
540 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
541 {
542     const int mmu_idx = s390x_env_mmu_index(env, false);
543     S390Access srca, desta;
544     uintptr_t ra = GETPC();
545     int i;
546 
547     /* MVCIN always copies one more byte than specified - maximum is 256 */
548     l++;
549 
550     src = wrap_address(env, src - l + 1);
551     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
552     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
553 
554     set_helper_retaddr(ra);
555     for (i = 0; i < l; i++) {
556         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
557         access_set_byte(env, &desta, i, x, ra);
558     }
559     clear_helper_retaddr();
560 }
561 
562 /* move numerics  */
563 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
564 {
565     const int mmu_idx = s390x_env_mmu_index(env, false);
566     S390Access srca1, srca2, desta;
567     uintptr_t ra = GETPC();
568     int i;
569 
570     /* MVN always copies one more byte than specified - maximum is 256 */
571     l++;
572 
573     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
574     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
575     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
576 
577     set_helper_retaddr(ra);
578     for (i = 0; i < l; i++) {
579         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
580                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
581 
582         access_set_byte(env, &desta, i, x, ra);
583     }
584     clear_helper_retaddr();
585 }
586 
587 /* move with offset  */
588 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
589 {
590     const int mmu_idx = s390x_env_mmu_index(env, false);
591     /* MVO always processes one more byte than specified - maximum is 16 */
592     const int len_dest = (l >> 4) + 1;
593     const int len_src = (l & 0xf) + 1;
594     uintptr_t ra = GETPC();
595     uint8_t byte_dest, byte_src;
596     S390Access srca, desta;
597     int i, j;
598 
599     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
600     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
601 
602     /* Handle rightmost byte */
603     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
604 
605     set_helper_retaddr(ra);
606     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
607     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
608     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
609 
610     /* Process remaining bytes from right to left */
611     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
612         byte_dest = byte_src >> 4;
613         if (j >= 0) {
614             byte_src = access_get_byte(env, &srca, j, ra);
615         } else {
616             byte_src = 0;
617         }
618         byte_dest |= byte_src << 4;
619         access_set_byte(env, &desta, i, byte_dest, ra);
620     }
621     clear_helper_retaddr();
622 }
623 
624 /* move zones  */
625 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
626 {
627     const int mmu_idx = s390x_env_mmu_index(env, false);
628     S390Access srca1, srca2, desta;
629     uintptr_t ra = GETPC();
630     int i;
631 
632     /* MVZ always copies one more byte than specified - maximum is 256 */
633     l++;
634 
635     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
636     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
637     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
638 
639     set_helper_retaddr(ra);
640     for (i = 0; i < l; i++) {
641         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
642                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
643 
644         access_set_byte(env, &desta, i, x, ra);
645     }
646     clear_helper_retaddr();
647 }
648 
649 /* compare unsigned byte arrays */
650 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
651                               uint64_t s2, uintptr_t ra)
652 {
653     uint32_t i;
654     uint32_t cc = 0;
655 
656     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
657                __func__, l, s1, s2);
658 
659     for (i = 0; i <= l; i++) {
660         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
661         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
662         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
663         if (x < y) {
664             cc = 1;
665             break;
666         } else if (x > y) {
667             cc = 2;
668             break;
669         }
670     }
671 
672     HELPER_LOG("\n");
673     return cc;
674 }
675 
676 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
677 {
678     return do_helper_clc(env, l, s1, s2, GETPC());
679 }
680 
681 /* compare logical under mask */
682 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
683                      uint64_t addr)
684 {
685     uintptr_t ra = GETPC();
686     uint32_t cc = 0;
687 
688     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
689                mask, addr);
690 
691     if (!mask) {
692         /* Recognize access exceptions for the first byte */
693         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
694     }
695 
696     while (mask) {
697         if (mask & 8) {
698             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
699             uint8_t r = extract32(r1, 24, 8);
700             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
701                        addr);
702             if (r < d) {
703                 cc = 1;
704                 break;
705             } else if (r > d) {
706                 cc = 2;
707                 break;
708             }
709             addr++;
710         }
711         mask = (mask << 1) & 0xf;
712         r1 <<= 8;
713     }
714 
715     HELPER_LOG("\n");
716     return cc;
717 }
718 
719 static inline uint64_t get_address(CPUS390XState *env, int reg)
720 {
721     return wrap_address(env, env->regs[reg]);
722 }
723 
724 /*
725  * Store the address to the given register, zeroing out unused leftmost
726  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
727  */
728 static inline void set_address_zero(CPUS390XState *env, int reg,
729                                     uint64_t address)
730 {
731     if (env->psw.mask & PSW_MASK_64) {
732         env->regs[reg] = address;
733     } else {
734         if (!(env->psw.mask & PSW_MASK_32)) {
735             address &= 0x00ffffff;
736         } else {
737             address &= 0x7fffffff;
738         }
739         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
740     }
741 }
742 
743 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
744 {
745     if (env->psw.mask & PSW_MASK_64) {
746         /* 64-Bit mode */
747         env->regs[reg] = address;
748     } else {
749         if (!(env->psw.mask & PSW_MASK_32)) {
750             /* 24-Bit mode. According to the PoO it is implementation
751             dependent if bits 32-39 remain unchanged or are set to
752             zeros.  Choose the former so that the function can also be
753             used for TRT.  */
754             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
755         } else {
756             /* 31-Bit mode. According to the PoO it is implementation
757             dependent if bit 32 remains unchanged or is set to zero.
758             Choose the latter so that the function can also be used for
759             TRT.  */
760             address &= 0x7fffffff;
761             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
762         }
763     }
764 }
765 
766 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
767 {
768     if (!(env->psw.mask & PSW_MASK_64)) {
769         return (uint32_t)length;
770     }
771     return length;
772 }
773 
774 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
775 {
776     if (!(env->psw.mask & PSW_MASK_64)) {
777         /* 24-Bit and 31-Bit mode */
778         length &= 0x7fffffff;
779     }
780     return length;
781 }
782 
783 static inline uint64_t get_length(CPUS390XState *env, int reg)
784 {
785     return wrap_length31(env, env->regs[reg]);
786 }
787 
788 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
789 {
790     if (env->psw.mask & PSW_MASK_64) {
791         /* 64-Bit mode */
792         env->regs[reg] = length;
793     } else {
794         /* 24-Bit and 31-Bit mode */
795         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
796     }
797 }
798 
799 /* search string (c is byte to search, r2 is string, r1 end of string) */
800 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
801 {
802     uintptr_t ra = GETPC();
803     uint64_t end, str;
804     uint32_t len;
805     uint8_t v, c = env->regs[0];
806 
807     /* Bits 32-55 must contain all 0.  */
808     if (env->regs[0] & 0xffffff00u) {
809         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
810     }
811 
812     str = get_address(env, r2);
813     end = get_address(env, r1);
814 
815     /* Lest we fail to service interrupts in a timely manner, limit the
816        amount of work we're willing to do.  For now, let's cap at 8k.  */
817     for (len = 0; len < 0x2000; ++len) {
818         if (str + len == end) {
819             /* Character not found.  R1 & R2 are unmodified.  */
820             env->cc_op = 2;
821             return;
822         }
823         v = cpu_ldub_data_ra(env, str + len, ra);
824         if (v == c) {
825             /* Character found.  Set R1 to the location; R2 is unmodified.  */
826             env->cc_op = 1;
827             set_address(env, r1, str + len);
828             return;
829         }
830     }
831 
832     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
833     env->cc_op = 3;
834     set_address(env, r2, str + len);
835 }
836 
837 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
838 {
839     uintptr_t ra = GETPC();
840     uint32_t len;
841     uint16_t v, c = env->regs[0];
842     uint64_t end, str, adj_end;
843 
844     /* Bits 32-47 of R0 must be zero.  */
845     if (env->regs[0] & 0xffff0000u) {
846         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
847     }
848 
849     str = get_address(env, r2);
850     end = get_address(env, r1);
851 
852     /* If the LSB of the two addresses differ, use one extra byte.  */
853     adj_end = end + ((str ^ end) & 1);
854 
855     /* Lest we fail to service interrupts in a timely manner, limit the
856        amount of work we're willing to do.  For now, let's cap at 8k.  */
857     for (len = 0; len < 0x2000; len += 2) {
858         if (str + len == adj_end) {
859             /* End of input found.  */
860             env->cc_op = 2;
861             return;
862         }
863         v = cpu_lduw_data_ra(env, str + len, ra);
864         if (v == c) {
865             /* Character found.  Set R1 to the location; R2 is unmodified.  */
866             env->cc_op = 1;
867             set_address(env, r1, str + len);
868             return;
869         }
870     }
871 
872     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
873     env->cc_op = 3;
874     set_address(env, r2, str + len);
875 }
876 
877 /* unsigned string compare (c is string terminator) */
878 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
879 {
880     uintptr_t ra = GETPC();
881     uint32_t len;
882 
883     c = c & 0xff;
884     s1 = wrap_address(env, s1);
885     s2 = wrap_address(env, s2);
886 
887     /* Lest we fail to service interrupts in a timely manner, limit the
888        amount of work we're willing to do.  For now, let's cap at 8k.  */
889     for (len = 0; len < 0x2000; ++len) {
890         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
891         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
892         if (v1 == v2) {
893             if (v1 == c) {
894                 /* Equal.  CC=0, and don't advance the registers.  */
895                 env->cc_op = 0;
896                 return int128_make128(s2, s1);
897             }
898         } else {
899             /* Unequal.  CC={1,2}, and advance the registers.  Note that
900                the terminator need not be zero, but the string that contains
901                the terminator is by definition "low".  */
902             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
903             return int128_make128(s2 + len, s1 + len);
904         }
905     }
906 
907     /* CPU-determined bytes equal; advance the registers.  */
908     env->cc_op = 3;
909     return int128_make128(s2 + len, s1 + len);
910 }
911 
912 /* move page */
913 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
914 {
915     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
916     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
917     const int mmu_idx = s390x_env_mmu_index(env, false);
918     const bool f = extract64(r0, 11, 1);
919     const bool s = extract64(r0, 10, 1);
920     const bool cco = extract64(r0, 8, 1);
921     uintptr_t ra = GETPC();
922     S390Access srca, desta;
923     int exc;
924 
925     if ((f && s) || extract64(r0, 12, 4)) {
926         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
927     }
928 
929     /*
930      * We always manually handle exceptions such that we can properly store
931      * r1/r2 to the lowcore on page-translation exceptions.
932      *
933      * TODO: Access key handling
934      */
935     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
936                             MMU_DATA_LOAD, mmu_idx, ra);
937     if (exc) {
938         if (cco) {
939             return 2;
940         }
941         goto inject_exc;
942     }
943     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
944                             MMU_DATA_STORE, mmu_idx, ra);
945     if (exc) {
946         if (cco && exc != PGM_PROTECTION) {
947             return 1;
948         }
949         goto inject_exc;
950     }
951     access_memmove(env, &desta, &srca, ra);
952     return 0; /* data moved */
953 inject_exc:
954 #if !defined(CONFIG_USER_ONLY)
955     if (exc != PGM_ADDRESSING) {
956         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
957                  env->tlb_fill_tec);
958     }
959     if (exc == PGM_PAGE_TRANS) {
960         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
961                  r1 << 4 | r2);
962     }
963 #endif
964     tcg_s390_program_interrupt(env, exc, ra);
965 }
966 
967 /* string copy */
968 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
969 {
970     const int mmu_idx = s390x_env_mmu_index(env, false);
971     const uint64_t d = get_address(env, r1);
972     const uint64_t s = get_address(env, r2);
973     const uint8_t c = env->regs[0];
974     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
975     S390Access srca, desta;
976     uintptr_t ra = GETPC();
977     int i;
978 
979     if (env->regs[0] & 0xffffff00ull) {
980         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
981     }
982 
983     /*
984      * Our access should not exceed single pages, as we must not report access
985      * exceptions exceeding the actually copied range (which we don't know at
986      * this point). We might over-indicate watchpoints within the pages
987      * (if we ever care, we have to limit processing to a single byte).
988      */
989     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
990     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
991 
992     set_helper_retaddr(ra);
993     for (i = 0; i < len; i++) {
994         const uint8_t v = access_get_byte(env, &srca, i, ra);
995 
996         access_set_byte(env, &desta, i, v, ra);
997         if (v == c) {
998             clear_helper_retaddr();
999             set_address_zero(env, r1, d + i);
1000             return 1;
1001         }
1002     }
1003     clear_helper_retaddr();
1004     set_address_zero(env, r1, d + len);
1005     set_address_zero(env, r2, s + len);
1006     return 3;
1007 }
1008 
1009 /* load access registers r1 to r3 from memory at a2 */
1010 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1011 {
1012     uintptr_t ra = GETPC();
1013     int i;
1014 
1015     if (a2 & 0x3) {
1016         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1017     }
1018 
1019     for (i = r1;; i = (i + 1) % 16) {
1020         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1021         a2 += 4;
1022 
1023         if (i == r3) {
1024             break;
1025         }
1026     }
1027 }
1028 
1029 /* store access registers r1 to r3 in memory at a2 */
1030 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1031 {
1032     uintptr_t ra = GETPC();
1033     int i;
1034 
1035     if (a2 & 0x3) {
1036         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1037     }
1038 
1039     for (i = r1;; i = (i + 1) % 16) {
1040         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1041         a2 += 4;
1042 
1043         if (i == r3) {
1044             break;
1045         }
1046     }
1047 }
1048 
1049 /* move long helper */
1050 static inline uint32_t do_mvcl(CPUS390XState *env,
1051                                uint64_t *dest, uint64_t *destlen,
1052                                uint64_t *src, uint64_t *srclen,
1053                                uint16_t pad, int wordsize, uintptr_t ra)
1054 {
1055     const int mmu_idx = s390x_env_mmu_index(env, false);
1056     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1057     S390Access srca, desta;
1058     int i, cc;
1059 
1060     if (*destlen == *srclen) {
1061         cc = 0;
1062     } else if (*destlen < *srclen) {
1063         cc = 1;
1064     } else {
1065         cc = 2;
1066     }
1067 
1068     if (!*destlen) {
1069         return cc;
1070     }
1071 
1072     /*
1073      * Only perform one type of type of operation (move/pad) at a time.
1074      * Stay within single pages.
1075      */
1076     if (*srclen) {
1077         /* Copy the src array */
1078         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1079         *destlen -= len;
1080         *srclen -= len;
1081         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1082         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1083         access_memmove(env, &desta, &srca, ra);
1084         *src = wrap_address(env, *src + len);
1085         *dest = wrap_address(env, *dest + len);
1086     } else if (wordsize == 1) {
1087         /* Pad the remaining area */
1088         *destlen -= len;
1089         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1090         access_memset(env, &desta, pad, ra);
1091         *dest = wrap_address(env, *dest + len);
1092     } else {
1093         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1094         set_helper_retaddr(ra);
1095 
1096         /* The remaining length selects the padding byte. */
1097         for (i = 0; i < len; (*destlen)--, i++) {
1098             if (*destlen & 1) {
1099                 access_set_byte(env, &desta, i, pad, ra);
1100             } else {
1101                 access_set_byte(env, &desta, i, pad >> 8, ra);
1102             }
1103         }
1104         clear_helper_retaddr();
1105         *dest = wrap_address(env, *dest + len);
1106     }
1107 
1108     return *destlen ? 3 : cc;
1109 }
1110 
1111 /* move long */
1112 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1113 {
1114     const int mmu_idx = s390x_env_mmu_index(env, false);
1115     uintptr_t ra = GETPC();
1116     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1117     uint64_t dest = get_address(env, r1);
1118     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1119     uint64_t src = get_address(env, r2);
1120     uint8_t pad = env->regs[r2 + 1] >> 24;
1121     CPUState *cs = env_cpu(env);
1122     S390Access srca, desta;
1123     uint32_t cc, cur_len;
1124 
1125     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1126         cc = 3;
1127     } else if (srclen == destlen) {
1128         cc = 0;
1129     } else if (destlen < srclen) {
1130         cc = 1;
1131     } else {
1132         cc = 2;
1133     }
1134 
1135     /* We might have to zero-out some bits even if there was no action. */
1136     if (unlikely(!destlen || cc == 3)) {
1137         set_address_zero(env, r2, src);
1138         set_address_zero(env, r1, dest);
1139         return cc;
1140     } else if (!srclen) {
1141         set_address_zero(env, r2, src);
1142     }
1143 
1144     /*
1145      * Only perform one type of type of operation (move/pad) in one step.
1146      * Stay within single pages.
1147      */
1148     while (destlen) {
1149         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1150         if (!srclen) {
1151             access_prepare(&desta, env, dest, cur_len,
1152                            MMU_DATA_STORE, mmu_idx, ra);
1153             access_memset(env, &desta, pad, ra);
1154         } else {
1155             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1156 
1157             access_prepare(&srca, env, src, cur_len,
1158                            MMU_DATA_LOAD, mmu_idx, ra);
1159             access_prepare(&desta, env, dest, cur_len,
1160                            MMU_DATA_STORE, mmu_idx, ra);
1161             access_memmove(env, &desta, &srca, ra);
1162             src = wrap_address(env, src + cur_len);
1163             srclen -= cur_len;
1164             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1165             set_address_zero(env, r2, src);
1166         }
1167         dest = wrap_address(env, dest + cur_len);
1168         destlen -= cur_len;
1169         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1170         set_address_zero(env, r1, dest);
1171 
1172         /*
1173          * MVCL is interruptible. Return to the main loop if requested after
1174          * writing back all state to registers. If no interrupt will get
1175          * injected, we'll end up back in this handler and continue processing
1176          * the remaining parts.
1177          */
1178         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1179             cpu_loop_exit_restore(cs, ra);
1180         }
1181     }
1182     return cc;
1183 }
1184 
1185 /* move long extended */
1186 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1187                        uint32_t r3)
1188 {
1189     uintptr_t ra = GETPC();
1190     uint64_t destlen = get_length(env, r1 + 1);
1191     uint64_t dest = get_address(env, r1);
1192     uint64_t srclen = get_length(env, r3 + 1);
1193     uint64_t src = get_address(env, r3);
1194     uint8_t pad = a2;
1195     uint32_t cc;
1196 
1197     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1198 
1199     set_length(env, r1 + 1, destlen);
1200     set_length(env, r3 + 1, srclen);
1201     set_address(env, r1, dest);
1202     set_address(env, r3, src);
1203 
1204     return cc;
1205 }
1206 
1207 /* move long unicode */
1208 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1209                        uint32_t r3)
1210 {
1211     uintptr_t ra = GETPC();
1212     uint64_t destlen = get_length(env, r1 + 1);
1213     uint64_t dest = get_address(env, r1);
1214     uint64_t srclen = get_length(env, r3 + 1);
1215     uint64_t src = get_address(env, r3);
1216     uint16_t pad = a2;
1217     uint32_t cc;
1218 
1219     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1220 
1221     set_length(env, r1 + 1, destlen);
1222     set_length(env, r3 + 1, srclen);
1223     set_address(env, r1, dest);
1224     set_address(env, r3, src);
1225 
1226     return cc;
1227 }
1228 
1229 /* compare logical long helper */
1230 static inline uint32_t do_clcl(CPUS390XState *env,
1231                                uint64_t *src1, uint64_t *src1len,
1232                                uint64_t *src3, uint64_t *src3len,
1233                                uint16_t pad, uint64_t limit,
1234                                int wordsize, uintptr_t ra)
1235 {
1236     uint64_t len = MAX(*src1len, *src3len);
1237     uint32_t cc = 0;
1238 
1239     check_alignment(env, *src1len | *src3len, wordsize, ra);
1240 
1241     if (!len) {
1242         return cc;
1243     }
1244 
1245     /* Lest we fail to service interrupts in a timely manner, limit the
1246        amount of work we're willing to do.  */
1247     if (len > limit) {
1248         len = limit;
1249         cc = 3;
1250     }
1251 
1252     for (; len; len -= wordsize) {
1253         uint16_t v1 = pad;
1254         uint16_t v3 = pad;
1255 
1256         if (*src1len) {
1257             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1258         }
1259         if (*src3len) {
1260             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1261         }
1262 
1263         if (v1 != v3) {
1264             cc = (v1 < v3) ? 1 : 2;
1265             break;
1266         }
1267 
1268         if (*src1len) {
1269             *src1 += wordsize;
1270             *src1len -= wordsize;
1271         }
1272         if (*src3len) {
1273             *src3 += wordsize;
1274             *src3len -= wordsize;
1275         }
1276     }
1277 
1278     return cc;
1279 }
1280 
1281 
1282 /* compare logical long */
1283 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1284 {
1285     uintptr_t ra = GETPC();
1286     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1287     uint64_t src1 = get_address(env, r1);
1288     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1289     uint64_t src3 = get_address(env, r2);
1290     uint8_t pad = env->regs[r2 + 1] >> 24;
1291     uint32_t cc;
1292 
1293     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1294 
1295     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1296     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1297     set_address(env, r1, src1);
1298     set_address(env, r2, src3);
1299 
1300     return cc;
1301 }
1302 
1303 /* compare logical long extended memcompare insn with padding */
1304 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1305                        uint32_t r3)
1306 {
1307     uintptr_t ra = GETPC();
1308     uint64_t src1len = get_length(env, r1 + 1);
1309     uint64_t src1 = get_address(env, r1);
1310     uint64_t src3len = get_length(env, r3 + 1);
1311     uint64_t src3 = get_address(env, r3);
1312     uint8_t pad = a2;
1313     uint32_t cc;
1314 
1315     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1316 
1317     set_length(env, r1 + 1, src1len);
1318     set_length(env, r3 + 1, src3len);
1319     set_address(env, r1, src1);
1320     set_address(env, r3, src3);
1321 
1322     return cc;
1323 }
1324 
1325 /* compare logical long unicode memcompare insn with padding */
1326 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1327                        uint32_t r3)
1328 {
1329     uintptr_t ra = GETPC();
1330     uint64_t src1len = get_length(env, r1 + 1);
1331     uint64_t src1 = get_address(env, r1);
1332     uint64_t src3len = get_length(env, r3 + 1);
1333     uint64_t src3 = get_address(env, r3);
1334     uint16_t pad = a2;
1335     uint32_t cc = 0;
1336 
1337     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1338 
1339     set_length(env, r1 + 1, src1len);
1340     set_length(env, r3 + 1, src3len);
1341     set_address(env, r1, src1);
1342     set_address(env, r3, src3);
1343 
1344     return cc;
1345 }
1346 
1347 /* checksum */
1348 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1349                     uint64_t src, uint64_t src_len)
1350 {
1351     uintptr_t ra = GETPC();
1352     uint64_t max_len, len;
1353     uint64_t cksm = (uint32_t)r1;
1354 
1355     /* Lest we fail to service interrupts in a timely manner, limit the
1356        amount of work we're willing to do.  For now, let's cap at 8k.  */
1357     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1358 
1359     /* Process full words as available.  */
1360     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1361         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1362     }
1363 
1364     switch (max_len - len) {
1365     case 1:
1366         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1367         len += 1;
1368         break;
1369     case 2:
1370         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1371         len += 2;
1372         break;
1373     case 3:
1374         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1375         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1376         len += 3;
1377         break;
1378     }
1379 
1380     /* Fold the carry from the checksum.  Note that we can see carry-out
1381        during folding more than once (but probably not more than twice).  */
1382     while (cksm > 0xffffffffull) {
1383         cksm = (uint32_t)cksm + (cksm >> 32);
1384     }
1385 
1386     /* Indicate whether or not we've processed everything.  */
1387     env->cc_op = (len == src_len ? 0 : 3);
1388 
1389     /* Return both cksm and processed length.  */
1390     return int128_make128(cksm, len);
1391 }
1392 
1393 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1394 {
1395     uintptr_t ra = GETPC();
1396     int len_dest = len >> 4;
1397     int len_src = len & 0xf;
1398     uint8_t b;
1399 
1400     dest += len_dest;
1401     src += len_src;
1402 
1403     /* last byte is special, it only flips the nibbles */
1404     b = cpu_ldub_data_ra(env, src, ra);
1405     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1406     src--;
1407     len_src--;
1408 
1409     /* now pack every value */
1410     while (len_dest > 0) {
1411         b = 0;
1412 
1413         if (len_src >= 0) {
1414             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1415             src--;
1416             len_src--;
1417         }
1418         if (len_src >= 0) {
1419             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1420             src--;
1421             len_src--;
1422         }
1423 
1424         len_dest--;
1425         dest--;
1426         cpu_stb_data_ra(env, dest, b, ra);
1427     }
1428 }
1429 
1430 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1431                            uint32_t srclen, int ssize, uintptr_t ra)
1432 {
1433     int i;
1434     /* The destination operand is always 16 bytes long.  */
1435     const int destlen = 16;
1436 
1437     /* The operands are processed from right to left.  */
1438     src += srclen - 1;
1439     dest += destlen - 1;
1440 
1441     for (i = 0; i < destlen; i++) {
1442         uint8_t b = 0;
1443 
1444         /* Start with a positive sign */
1445         if (i == 0) {
1446             b = 0xc;
1447         } else if (srclen > ssize) {
1448             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1449             src -= ssize;
1450             srclen -= ssize;
1451         }
1452 
1453         if (srclen > ssize) {
1454             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1455             src -= ssize;
1456             srclen -= ssize;
1457         }
1458 
1459         cpu_stb_data_ra(env, dest, b, ra);
1460         dest--;
1461     }
1462 }
1463 
1464 
1465 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1466                  uint32_t srclen)
1467 {
1468     do_pkau(env, dest, src, srclen, 1, GETPC());
1469 }
1470 
1471 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1472                  uint32_t srclen)
1473 {
1474     do_pkau(env, dest, src, srclen, 2, GETPC());
1475 }
1476 
1477 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1478                   uint64_t src)
1479 {
1480     uintptr_t ra = GETPC();
1481     int len_dest = len >> 4;
1482     int len_src = len & 0xf;
1483     uint8_t b;
1484     int second_nibble = 0;
1485 
1486     dest += len_dest;
1487     src += len_src;
1488 
1489     /* last byte is special, it only flips the nibbles */
1490     b = cpu_ldub_data_ra(env, src, ra);
1491     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1492     src--;
1493     len_src--;
1494 
1495     /* now pad every nibble with 0xf0 */
1496 
1497     while (len_dest > 0) {
1498         uint8_t cur_byte = 0;
1499 
1500         if (len_src > 0) {
1501             cur_byte = cpu_ldub_data_ra(env, src, ra);
1502         }
1503 
1504         len_dest--;
1505         dest--;
1506 
1507         /* only advance one nibble at a time */
1508         if (second_nibble) {
1509             cur_byte >>= 4;
1510             len_src--;
1511             src--;
1512         }
1513         second_nibble = !second_nibble;
1514 
1515         /* digit */
1516         cur_byte = (cur_byte & 0xf);
1517         /* zone bits */
1518         cur_byte |= 0xf0;
1519 
1520         cpu_stb_data_ra(env, dest, cur_byte, ra);
1521     }
1522 }
1523 
1524 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1525                                  uint32_t destlen, int dsize, uint64_t src,
1526                                  uintptr_t ra)
1527 {
1528     int i;
1529     uint32_t cc;
1530     uint8_t b;
1531     /* The source operand is always 16 bytes long.  */
1532     const int srclen = 16;
1533 
1534     /* The operands are processed from right to left.  */
1535     src += srclen - 1;
1536     dest += destlen - dsize;
1537 
1538     /* Check for the sign.  */
1539     b = cpu_ldub_data_ra(env, src, ra);
1540     src--;
1541     switch (b & 0xf) {
1542     case 0xa:
1543     case 0xc:
1544     case 0xe ... 0xf:
1545         cc = 0;  /* plus */
1546         break;
1547     case 0xb:
1548     case 0xd:
1549         cc = 1;  /* minus */
1550         break;
1551     default:
1552     case 0x0 ... 0x9:
1553         cc = 3;  /* invalid */
1554         break;
1555     }
1556 
1557     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1558     for (i = 0; i < destlen; i += dsize) {
1559         if (i == (31 * dsize)) {
1560             /* If length is 32/64 bytes, the leftmost byte is 0. */
1561             b = 0;
1562         } else if (i % (2 * dsize)) {
1563             b = cpu_ldub_data_ra(env, src, ra);
1564             src--;
1565         } else {
1566             b >>= 4;
1567         }
1568         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1569         dest -= dsize;
1570     }
1571 
1572     return cc;
1573 }
1574 
1575 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1576                        uint64_t src)
1577 {
1578     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1579 }
1580 
1581 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1582                        uint64_t src)
1583 {
1584     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1585 }
1586 
1587 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1588 {
1589     uintptr_t ra = GETPC();
1590     uint32_t cc = 0;
1591     int i;
1592 
1593     for (i = 0; i < destlen; i++) {
1594         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1595         /* digit */
1596         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1597 
1598         if (i == (destlen - 1)) {
1599             /* sign */
1600             cc |= (b & 0xf) < 0xa ? 1 : 0;
1601         } else {
1602             /* digit */
1603             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1604         }
1605     }
1606 
1607     return cc;
1608 }
1609 
1610 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1611                              uint64_t trans, uintptr_t ra)
1612 {
1613     uint32_t i;
1614 
1615     for (i = 0; i <= len; i++) {
1616         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1617         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1618         cpu_stb_data_ra(env, array + i, new_byte, ra);
1619     }
1620 
1621     return env->cc_op;
1622 }
1623 
1624 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1625                 uint64_t trans)
1626 {
1627     do_helper_tr(env, len, array, trans, GETPC());
1628 }
1629 
1630 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1631                    uint64_t len, uint64_t trans)
1632 {
1633     uintptr_t ra = GETPC();
1634     uint8_t end = env->regs[0] & 0xff;
1635     uint64_t l = len;
1636     uint64_t i;
1637     uint32_t cc = 0;
1638 
1639     if (!(env->psw.mask & PSW_MASK_64)) {
1640         array &= 0x7fffffff;
1641         l = (uint32_t)l;
1642     }
1643 
1644     /* Lest we fail to service interrupts in a timely manner, limit the
1645        amount of work we're willing to do.  For now, let's cap at 8k.  */
1646     if (l > 0x2000) {
1647         l = 0x2000;
1648         cc = 3;
1649     }
1650 
1651     for (i = 0; i < l; i++) {
1652         uint8_t byte, new_byte;
1653 
1654         byte = cpu_ldub_data_ra(env, array + i, ra);
1655 
1656         if (byte == end) {
1657             cc = 1;
1658             break;
1659         }
1660 
1661         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1662         cpu_stb_data_ra(env, array + i, new_byte, ra);
1663     }
1664 
1665     env->cc_op = cc;
1666     return int128_make128(len - i, array + i);
1667 }
1668 
1669 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1670                                      uint64_t array, uint64_t trans,
1671                                      int inc, uintptr_t ra)
1672 {
1673     int i;
1674 
1675     for (i = 0; i <= len; i++) {
1676         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1677         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1678 
1679         if (sbyte != 0) {
1680             set_address(env, 1, array + i * inc);
1681             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1682             return (i == len) ? 2 : 1;
1683         }
1684     }
1685 
1686     return 0;
1687 }
1688 
1689 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1690                                   uint64_t array, uint64_t trans,
1691                                   uintptr_t ra)
1692 {
1693     return do_helper_trt(env, len, array, trans, 1, ra);
1694 }
1695 
1696 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1697                      uint64_t trans)
1698 {
1699     return do_helper_trt(env, len, array, trans, 1, GETPC());
1700 }
1701 
1702 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1703                                    uint64_t array, uint64_t trans,
1704                                    uintptr_t ra)
1705 {
1706     return do_helper_trt(env, len, array, trans, -1, ra);
1707 }
1708 
1709 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1710                       uint64_t trans)
1711 {
1712     return do_helper_trt(env, len, array, trans, -1, GETPC());
1713 }
1714 
1715 /* Translate one/two to one/two */
1716 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1717                       uint32_t tst, uint32_t sizes)
1718 {
1719     uintptr_t ra = GETPC();
1720     int dsize = (sizes & 1) ? 1 : 2;
1721     int ssize = (sizes & 2) ? 1 : 2;
1722     uint64_t tbl = get_address(env, 1);
1723     uint64_t dst = get_address(env, r1);
1724     uint64_t len = get_length(env, r1 + 1);
1725     uint64_t src = get_address(env, r2);
1726     uint32_t cc = 3;
1727     int i;
1728 
1729     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1730        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1731        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1732     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1733         tbl &= -4096;
1734     } else {
1735         tbl &= -8;
1736     }
1737 
1738     check_alignment(env, len, ssize, ra);
1739 
1740     /* Lest we fail to service interrupts in a timely manner, */
1741     /* limit the amount of work we're willing to do.   */
1742     for (i = 0; i < 0x2000; i++) {
1743         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1744         uint64_t tble = tbl + (sval * dsize);
1745         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1746         if (dval == tst) {
1747             cc = 1;
1748             break;
1749         }
1750         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1751 
1752         len -= ssize;
1753         src += ssize;
1754         dst += dsize;
1755 
1756         if (len == 0) {
1757             cc = 0;
1758             break;
1759         }
1760     }
1761 
1762     set_address(env, r1, dst);
1763     set_length(env, r1 + 1, len);
1764     set_address(env, r2, src);
1765 
1766     return cc;
1767 }
1768 
1769 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1770                         uint64_t a2, bool parallel)
1771 {
1772     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1773     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1774     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1775     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1776     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1777     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1778     uintptr_t ra = GETPC();
1779     uint32_t fc = extract32(env->regs[0], 0, 8);
1780     uint32_t sc = extract32(env->regs[0], 8, 8);
1781     uint64_t pl = get_address(env, 1) & -16;
1782     uint64_t svh, svl;
1783     uint32_t cc;
1784 
1785     /* Sanity check the function code and storage characteristic.  */
1786     if (fc > 1 || sc > 3) {
1787         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1788             goto spec_exception;
1789         }
1790         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1791             goto spec_exception;
1792         }
1793     }
1794 
1795     /* Sanity check the alignments.  */
1796     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1797         goto spec_exception;
1798     }
1799 
1800     /* Sanity check writability of the store address.  */
1801     probe_write(env, a2, 1 << sc, mem_idx, ra);
1802 
1803     /*
1804      * Note that the compare-and-swap is atomic, and the store is atomic,
1805      * but the complete operation is not.  Therefore we do not need to
1806      * assert serial context in order to implement this.  That said,
1807      * restart early if we can't support either operation that is supposed
1808      * to be atomic.
1809      */
1810     if (parallel) {
1811         uint32_t max = 2;
1812 #ifdef CONFIG_ATOMIC64
1813         max = 3;
1814 #endif
1815         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1816             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1817             cpu_loop_exit_atomic(env_cpu(env), ra);
1818         }
1819     }
1820 
1821     /*
1822      * All loads happen before all stores.  For simplicity, load the entire
1823      * store value area from the parameter list.
1824      */
1825     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1826     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1827 
1828     switch (fc) {
1829     case 0:
1830         {
1831             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1832             uint32_t cv = env->regs[r3];
1833             uint32_t ov;
1834 
1835             if (parallel) {
1836                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1837             } else {
1838                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1839                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1840             }
1841             cc = (ov != cv);
1842             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1843         }
1844         break;
1845 
1846     case 1:
1847         {
1848             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1849             uint64_t cv = env->regs[r3];
1850             uint64_t ov;
1851 
1852             if (parallel) {
1853 #ifdef CONFIG_ATOMIC64
1854                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1855 #else
1856                 /* Note that we asserted !parallel above.  */
1857                 g_assert_not_reached();
1858 #endif
1859             } else {
1860                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1861                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1862             }
1863             cc = (ov != cv);
1864             env->regs[r3] = ov;
1865         }
1866         break;
1867 
1868     case 2:
1869         {
1870             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1871             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1872             Int128 ov;
1873 
1874             if (!parallel) {
1875                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1876                 cc = !int128_eq(ov, cv);
1877                 if (cc) {
1878                     nv = ov;
1879                 }
1880                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1881             } else if (HAVE_CMPXCHG128) {
1882                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1883                 cc = !int128_eq(ov, cv);
1884             } else {
1885                 /* Note that we asserted !parallel above.  */
1886                 g_assert_not_reached();
1887             }
1888 
1889             env->regs[r3 + 0] = int128_gethi(ov);
1890             env->regs[r3 + 1] = int128_getlo(ov);
1891         }
1892         break;
1893 
1894     default:
1895         g_assert_not_reached();
1896     }
1897 
1898     /* Store only if the comparison succeeded.  Note that above we use a pair
1899        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1900        from the most-significant bits of svh.  */
1901     if (cc == 0) {
1902         switch (sc) {
1903         case 0:
1904             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1905             break;
1906         case 1:
1907             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1908             break;
1909         case 2:
1910             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1911             break;
1912         case 3:
1913             cpu_stq_mmu(env, a2, svh, oi8, ra);
1914             break;
1915         case 4:
1916             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1917             break;
1918         default:
1919             g_assert_not_reached();
1920         }
1921     }
1922 
1923     return cc;
1924 
1925  spec_exception:
1926     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1927 }
1928 
1929 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1930 {
1931     return do_csst(env, r3, a1, a2, false);
1932 }
1933 
1934 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1935                                uint64_t a2)
1936 {
1937     return do_csst(env, r3, a1, a2, true);
1938 }
1939 
1940 #if !defined(CONFIG_USER_ONLY)
1941 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1942 {
1943     uintptr_t ra = GETPC();
1944     bool PERchanged = false;
1945     uint64_t src = a2;
1946     uint32_t i;
1947 
1948     if (src & 0x7) {
1949         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1950     }
1951 
1952     for (i = r1;; i = (i + 1) % 16) {
1953         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1954         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1955             PERchanged = true;
1956         }
1957         env->cregs[i] = val;
1958         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1959                    i, src, val);
1960         src += sizeof(uint64_t);
1961 
1962         if (i == r3) {
1963             break;
1964         }
1965     }
1966 
1967     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1968         s390_cpu_recompute_watchpoints(env_cpu(env));
1969     }
1970 
1971     tlb_flush(env_cpu(env));
1972 }
1973 
1974 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1975 {
1976     uintptr_t ra = GETPC();
1977     bool PERchanged = false;
1978     uint64_t src = a2;
1979     uint32_t i;
1980 
1981     if (src & 0x3) {
1982         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1983     }
1984 
1985     for (i = r1;; i = (i + 1) % 16) {
1986         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1987         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1988             PERchanged = true;
1989         }
1990         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1991         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1992         src += sizeof(uint32_t);
1993 
1994         if (i == r3) {
1995             break;
1996         }
1997     }
1998 
1999     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2000         s390_cpu_recompute_watchpoints(env_cpu(env));
2001     }
2002 
2003     tlb_flush(env_cpu(env));
2004 }
2005 
2006 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2007 {
2008     uintptr_t ra = GETPC();
2009     uint64_t dest = a2;
2010     uint32_t i;
2011 
2012     if (dest & 0x7) {
2013         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2014     }
2015 
2016     for (i = r1;; i = (i + 1) % 16) {
2017         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2018         dest += sizeof(uint64_t);
2019 
2020         if (i == r3) {
2021             break;
2022         }
2023     }
2024 }
2025 
2026 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2027 {
2028     uintptr_t ra = GETPC();
2029     uint64_t dest = a2;
2030     uint32_t i;
2031 
2032     if (dest & 0x3) {
2033         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2034     }
2035 
2036     for (i = r1;; i = (i + 1) % 16) {
2037         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2038         dest += sizeof(uint32_t);
2039 
2040         if (i == r3) {
2041             break;
2042         }
2043     }
2044 }
2045 
2046 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2047 {
2048     uintptr_t ra = GETPC();
2049     int i;
2050 
2051     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2052 
2053     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2054         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2055     }
2056 
2057     return 0;
2058 }
2059 
2060 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2061 {
2062     S390CPU *cpu = env_archcpu(env);
2063     CPUState *cs = env_cpu(env);
2064 
2065     /*
2066      * TODO: we currently don't handle all access protection types
2067      * (including access-list and key-controlled) as well as AR mode.
2068      */
2069     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2070         /* Fetching permitted; storing permitted */
2071         return 0;
2072     }
2073 
2074     if (env->int_pgm_code == PGM_PROTECTION) {
2075         /* retry if reading is possible */
2076         cs->exception_index = -1;
2077         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2078             /* Fetching permitted; storing not permitted */
2079             return 1;
2080         }
2081     }
2082 
2083     switch (env->int_pgm_code) {
2084     case PGM_PROTECTION:
2085         /* Fetching not permitted; storing not permitted */
2086         cs->exception_index = -1;
2087         return 2;
2088     case PGM_ADDRESSING:
2089     case PGM_TRANS_SPEC:
2090         /* exceptions forwarded to the guest */
2091         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2092         return 0;
2093     }
2094 
2095     /* Translation not available */
2096     cs->exception_index = -1;
2097     return 3;
2098 }
2099 
2100 /* insert storage key extended */
2101 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2102 {
2103     static S390SKeysState *ss;
2104     static S390SKeysClass *skeyclass;
2105     uint64_t addr = wrap_address(env, r2);
2106     uint8_t key;
2107     int rc;
2108 
2109     addr = mmu_real2abs(env, addr);
2110     if (!mmu_absolute_addr_valid(addr, false)) {
2111         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2112     }
2113 
2114     if (unlikely(!ss)) {
2115         ss = s390_get_skeys_device();
2116         skeyclass = S390_SKEYS_GET_CLASS(ss);
2117         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2118             tlb_flush_all_cpus_synced(env_cpu(env));
2119         }
2120     }
2121 
2122     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2123     if (rc) {
2124         return 0;
2125     }
2126     return key;
2127 }
2128 
2129 /* set storage key extended */
2130 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2131 {
2132     static S390SKeysState *ss;
2133     static S390SKeysClass *skeyclass;
2134     uint64_t addr = wrap_address(env, r2);
2135     uint8_t key;
2136 
2137     addr = mmu_real2abs(env, addr);
2138     if (!mmu_absolute_addr_valid(addr, false)) {
2139         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2140     }
2141 
2142     if (unlikely(!ss)) {
2143         ss = s390_get_skeys_device();
2144         skeyclass = S390_SKEYS_GET_CLASS(ss);
2145         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2146             tlb_flush_all_cpus_synced(env_cpu(env));
2147         }
2148     }
2149 
2150     key = r1 & 0xfe;
2151     s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2152    /*
2153     * As we can only flush by virtual address and not all the entries
2154     * that point to a physical address we have to flush the whole TLB.
2155     */
2156     tlb_flush_all_cpus_synced(env_cpu(env));
2157 }
2158 
2159 /* reset reference bit extended */
2160 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2161 {
2162     uint64_t addr = wrap_address(env, r2);
2163     static S390SKeysState *ss;
2164     static S390SKeysClass *skeyclass;
2165     uint8_t re, key;
2166     int rc;
2167 
2168     addr = mmu_real2abs(env, addr);
2169     if (!mmu_absolute_addr_valid(addr, false)) {
2170         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2171     }
2172 
2173     if (unlikely(!ss)) {
2174         ss = s390_get_skeys_device();
2175         skeyclass = S390_SKEYS_GET_CLASS(ss);
2176         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2177             tlb_flush_all_cpus_synced(env_cpu(env));
2178         }
2179     }
2180 
2181     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2182     if (rc) {
2183         return 0;
2184     }
2185 
2186     re = key & (SK_R | SK_C);
2187     key &= ~SK_R;
2188 
2189     rc = s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2190     if (rc) {
2191         return 0;
2192     }
2193    /*
2194     * As we can only flush by virtual address and not all the entries
2195     * that point to a physical address we have to flush the whole TLB.
2196     */
2197     tlb_flush_all_cpus_synced(env_cpu(env));
2198 
2199     /*
2200      * cc
2201      *
2202      * 0  Reference bit zero; change bit zero
2203      * 1  Reference bit zero; change bit one
2204      * 2  Reference bit one; change bit zero
2205      * 3  Reference bit one; change bit one
2206      */
2207 
2208     return re >> 1;
2209 }
2210 
2211 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2212                       uint64_t key)
2213 {
2214     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2215     S390Access srca, desta;
2216     uintptr_t ra = GETPC();
2217     int cc = 0;
2218 
2219     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2220                __func__, l, a1, a2);
2221 
2222     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2223         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2224         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2225     }
2226 
2227     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2228         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2229     }
2230 
2231     l = wrap_length32(env, l);
2232     if (l > 256) {
2233         /* max 256 */
2234         l = 256;
2235         cc = 3;
2236     } else if (!l) {
2237         return cc;
2238     }
2239 
2240     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2241     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2242     access_memmove(env, &desta, &srca, ra);
2243     return cc;
2244 }
2245 
2246 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2247                       uint64_t key)
2248 {
2249     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2250     S390Access srca, desta;
2251     uintptr_t ra = GETPC();
2252     int cc = 0;
2253 
2254     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2255                __func__, l, a1, a2);
2256 
2257     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2258         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2259         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2260     }
2261 
2262     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2263         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2264     }
2265 
2266     l = wrap_length32(env, l);
2267     if (l > 256) {
2268         /* max 256 */
2269         l = 256;
2270         cc = 3;
2271     } else if (!l) {
2272         return cc;
2273     }
2274     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2275     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2276     access_memmove(env, &desta, &srca, ra);
2277     return cc;
2278 }
2279 
2280 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2281 {
2282     CPUState *cs = env_cpu(env);
2283     const uintptr_t ra = GETPC();
2284     uint64_t table, entry, raddr;
2285     uint16_t entries, i, index = 0;
2286 
2287     if (r2 & 0xff000) {
2288         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2289     }
2290 
2291     if (!(r2 & 0x800)) {
2292         /* invalidation-and-clearing operation */
2293         table = r1 & ASCE_ORIGIN;
2294         entries = (r2 & 0x7ff) + 1;
2295 
2296         switch (r1 & ASCE_TYPE_MASK) {
2297         case ASCE_TYPE_REGION1:
2298             index = (r2 >> 53) & 0x7ff;
2299             break;
2300         case ASCE_TYPE_REGION2:
2301             index = (r2 >> 42) & 0x7ff;
2302             break;
2303         case ASCE_TYPE_REGION3:
2304             index = (r2 >> 31) & 0x7ff;
2305             break;
2306         case ASCE_TYPE_SEGMENT:
2307             index = (r2 >> 20) & 0x7ff;
2308             break;
2309         }
2310         for (i = 0; i < entries; i++) {
2311             /* addresses are not wrapped in 24/31bit mode but table index is */
2312             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2313             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2314             if (!(entry & REGION_ENTRY_I)) {
2315                 /* we are allowed to not store if already invalid */
2316                 entry |= REGION_ENTRY_I;
2317                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2318             }
2319         }
2320     }
2321 
2322     /* We simply flush the complete tlb, therefore we can ignore r3. */
2323     if (m4 & 1) {
2324         tlb_flush(cs);
2325     } else {
2326         tlb_flush_all_cpus_synced(cs);
2327     }
2328 }
2329 
2330 /* invalidate pte */
2331 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2332                   uint32_t m4)
2333 {
2334     CPUState *cs = env_cpu(env);
2335     const uintptr_t ra = GETPC();
2336     uint64_t page = vaddr & TARGET_PAGE_MASK;
2337     uint64_t pte_addr, pte;
2338 
2339     /* Compute the page table entry address */
2340     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2341     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2342 
2343     /* Mark the page table entry as invalid */
2344     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2345     pte |= PAGE_ENTRY_I;
2346     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2347 
2348     /* XXX we exploit the fact that Linux passes the exact virtual
2349        address here - it's not obliged to! */
2350     if (m4 & 1) {
2351         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2352             tlb_flush_page(cs, page);
2353             /* XXX 31-bit hack */
2354             tlb_flush_page(cs, page ^ 0x80000000);
2355         } else {
2356             /* looks like we don't have a valid virtual address */
2357             tlb_flush(cs);
2358         }
2359     } else {
2360         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2361             tlb_flush_page_all_cpus_synced(cs, page);
2362             /* XXX 31-bit hack */
2363             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2364         } else {
2365             /* looks like we don't have a valid virtual address */
2366             tlb_flush_all_cpus_synced(cs);
2367         }
2368     }
2369 }
2370 
2371 /* flush local tlb */
2372 void HELPER(ptlb)(CPUS390XState *env)
2373 {
2374     tlb_flush(env_cpu(env));
2375 }
2376 
2377 /* flush global tlb */
2378 void HELPER(purge)(CPUS390XState *env)
2379 {
2380     tlb_flush_all_cpus_synced(env_cpu(env));
2381 }
2382 
2383 /* load real address */
2384 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2385 {
2386     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2387     uint64_t ret, tec;
2388     int flags, exc, cc;
2389 
2390     /* XXX incomplete - has more corner cases */
2391     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2392         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2393     }
2394 
2395     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2396     if (exc) {
2397         cc = 3;
2398         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2399     } else {
2400         cc = 0;
2401         ret |= addr & ~TARGET_PAGE_MASK;
2402     }
2403 
2404     env->cc_op = cc;
2405     return ret;
2406 }
2407 #endif
2408 
2409 /* Execute instruction.  This instruction executes an insn modified with
2410    the contents of r1.  It does not change the executed instruction in memory;
2411    it does not change the program counter.
2412 
2413    Perform this by recording the modified instruction in env->ex_value.
2414    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2415 */
2416 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2417 {
2418     uint64_t insn;
2419     uint8_t opc;
2420 
2421     /* EXECUTE targets must be at even addresses.  */
2422     if (addr & 1) {
2423         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2424     }
2425 
2426     insn = cpu_lduw_code(env, addr);
2427     opc = insn >> 8;
2428 
2429     /* Or in the contents of R1[56:63].  */
2430     insn |= r1 & 0xff;
2431 
2432     /* Load the rest of the instruction.  */
2433     insn <<= 48;
2434     switch (get_ilen(opc)) {
2435     case 2:
2436         break;
2437     case 4:
2438         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2439         break;
2440     case 6:
2441         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2442         break;
2443     default:
2444         g_assert_not_reached();
2445     }
2446 
2447     /* The very most common cases can be sped up by avoiding a new TB.  */
2448     if ((opc & 0xf0) == 0xd0) {
2449         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2450                                       uint64_t, uintptr_t);
2451         static const dx_helper dx[16] = {
2452             [0x0] = do_helper_trt_bkwd,
2453             [0x2] = do_helper_mvc,
2454             [0x4] = do_helper_nc,
2455             [0x5] = do_helper_clc,
2456             [0x6] = do_helper_oc,
2457             [0x7] = do_helper_xc,
2458             [0xc] = do_helper_tr,
2459             [0xd] = do_helper_trt_fwd,
2460         };
2461         dx_helper helper = dx[opc & 0xf];
2462 
2463         if (helper) {
2464             uint32_t l = extract64(insn, 48, 8);
2465             uint32_t b1 = extract64(insn, 44, 4);
2466             uint32_t d1 = extract64(insn, 32, 12);
2467             uint32_t b2 = extract64(insn, 28, 4);
2468             uint32_t d2 = extract64(insn, 16, 12);
2469             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2470             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2471 
2472             env->cc_op = helper(env, l, a1, a2, 0);
2473             env->psw.addr += ilen;
2474             return;
2475         }
2476     } else if (opc == 0x0a) {
2477         env->int_svc_code = extract64(insn, 48, 8);
2478         env->int_svc_ilen = ilen;
2479         helper_exception(env, EXCP_SVC);
2480         g_assert_not_reached();
2481     }
2482 
2483     /* Record the insn we want to execute as well as the ilen to use
2484        during the execution of the target insn.  This will also ensure
2485        that ex_value is non-zero, which flags that we are in a state
2486        that requires such execution.  */
2487     env->ex_value = insn | ilen;
2488     env->ex_target = addr;
2489 }
2490 
2491 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2492                        uint64_t len)
2493 {
2494     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2495     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2496     const uint64_t r0 = env->regs[0];
2497     const uintptr_t ra = GETPC();
2498     uint8_t dest_key, dest_as, dest_k, dest_a;
2499     uint8_t src_key, src_as, src_k, src_a;
2500     uint64_t val;
2501     int cc = 0;
2502 
2503     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2504                __func__, dest, src, len);
2505 
2506     if (!(env->psw.mask & PSW_MASK_DAT)) {
2507         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2508     }
2509 
2510     /* OAC (operand access control) for the first operand -> dest */
2511     val = (r0 & 0xffff0000ULL) >> 16;
2512     dest_key = (val >> 12) & 0xf;
2513     dest_as = (val >> 6) & 0x3;
2514     dest_k = (val >> 1) & 0x1;
2515     dest_a = val & 0x1;
2516 
2517     /* OAC (operand access control) for the second operand -> src */
2518     val = (r0 & 0x0000ffffULL);
2519     src_key = (val >> 12) & 0xf;
2520     src_as = (val >> 6) & 0x3;
2521     src_k = (val >> 1) & 0x1;
2522     src_a = val & 0x1;
2523 
2524     if (!dest_k) {
2525         dest_key = psw_key;
2526     }
2527     if (!src_k) {
2528         src_key = psw_key;
2529     }
2530     if (!dest_a) {
2531         dest_as = psw_as;
2532     }
2533     if (!src_a) {
2534         src_as = psw_as;
2535     }
2536 
2537     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2538         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2539     }
2540     if (!(env->cregs[0] & CR0_SECONDARY) &&
2541         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2542         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2543     }
2544     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2545         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2546     }
2547 
2548     len = wrap_length32(env, len);
2549     if (len > 4096) {
2550         cc = 3;
2551         len = 4096;
2552     }
2553 
2554     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2555     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2556         (env->psw.mask & PSW_MASK_PSTATE)) {
2557         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2558                       __func__);
2559         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2560     }
2561 
2562     /* FIXME: Access using correct keys and AR-mode */
2563     if (len) {
2564         S390Access srca, desta;
2565 
2566         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2567                        mmu_idx_from_as(src_as), ra);
2568         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2569                        mmu_idx_from_as(dest_as), ra);
2570 
2571         access_memmove(env, &desta, &srca, ra);
2572     }
2573 
2574     return cc;
2575 }
2576 
2577 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2578    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2579    value >= 0 indicates failure, and the CC value to be returned.  */
2580 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2581                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2582                                  uint32_t *ochar, uint32_t *olen);
2583 
2584 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2585    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2586    indicates failure, and the CC value to be returned.  */
2587 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2588                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2589                                  uint32_t *olen);
2590 
2591 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2592                        bool enh_check, uintptr_t ra,
2593                        uint32_t *ochar, uint32_t *olen)
2594 {
2595     uint8_t s0, s1, s2, s3;
2596     uint32_t c, l;
2597 
2598     if (ilen < 1) {
2599         return 0;
2600     }
2601     s0 = cpu_ldub_data_ra(env, addr, ra);
2602     if (s0 <= 0x7f) {
2603         /* one byte character */
2604         l = 1;
2605         c = s0;
2606     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2607         /* invalid character */
2608         return 2;
2609     } else if (s0 <= 0xdf) {
2610         /* two byte character */
2611         l = 2;
2612         if (ilen < 2) {
2613             return 0;
2614         }
2615         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2616         c = s0 & 0x1f;
2617         c = (c << 6) | (s1 & 0x3f);
2618         if (enh_check && (s1 & 0xc0) != 0x80) {
2619             return 2;
2620         }
2621     } else if (s0 <= 0xef) {
2622         /* three byte character */
2623         l = 3;
2624         if (ilen < 3) {
2625             return 0;
2626         }
2627         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2628         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2629         c = s0 & 0x0f;
2630         c = (c << 6) | (s1 & 0x3f);
2631         c = (c << 6) | (s2 & 0x3f);
2632         /* Fold the byte-by-byte range descriptions in the PoO into
2633            tests against the complete value.  It disallows encodings
2634            that could be smaller, and the UTF-16 surrogates.  */
2635         if (enh_check
2636             && ((s1 & 0xc0) != 0x80
2637                 || (s2 & 0xc0) != 0x80
2638                 || c < 0x1000
2639                 || (c >= 0xd800 && c <= 0xdfff))) {
2640             return 2;
2641         }
2642     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2643         /* four byte character */
2644         l = 4;
2645         if (ilen < 4) {
2646             return 0;
2647         }
2648         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2649         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2650         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2651         c = s0 & 0x07;
2652         c = (c << 6) | (s1 & 0x3f);
2653         c = (c << 6) | (s2 & 0x3f);
2654         c = (c << 6) | (s3 & 0x3f);
2655         /* See above.  */
2656         if (enh_check
2657             && ((s1 & 0xc0) != 0x80
2658                 || (s2 & 0xc0) != 0x80
2659                 || (s3 & 0xc0) != 0x80
2660                 || c < 0x010000
2661                 || c > 0x10ffff)) {
2662             return 2;
2663         }
2664     } else {
2665         /* invalid character */
2666         return 2;
2667     }
2668 
2669     *ochar = c;
2670     *olen = l;
2671     return -1;
2672 }
2673 
2674 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2675                         bool enh_check, uintptr_t ra,
2676                         uint32_t *ochar, uint32_t *olen)
2677 {
2678     uint16_t s0, s1;
2679     uint32_t c, l;
2680 
2681     if (ilen < 2) {
2682         return 0;
2683     }
2684     s0 = cpu_lduw_data_ra(env, addr, ra);
2685     if ((s0 & 0xfc00) != 0xd800) {
2686         /* one word character */
2687         l = 2;
2688         c = s0;
2689     } else {
2690         /* two word character */
2691         l = 4;
2692         if (ilen < 4) {
2693             return 0;
2694         }
2695         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2696         c = extract32(s0, 6, 4) + 1;
2697         c = (c << 6) | (s0 & 0x3f);
2698         c = (c << 10) | (s1 & 0x3ff);
2699         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2700             /* invalid surrogate character */
2701             return 2;
2702         }
2703     }
2704 
2705     *ochar = c;
2706     *olen = l;
2707     return -1;
2708 }
2709 
2710 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2711                         bool enh_check, uintptr_t ra,
2712                         uint32_t *ochar, uint32_t *olen)
2713 {
2714     uint32_t c;
2715 
2716     if (ilen < 4) {
2717         return 0;
2718     }
2719     c = cpu_ldl_data_ra(env, addr, ra);
2720     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2721         /* invalid unicode character */
2722         return 2;
2723     }
2724 
2725     *ochar = c;
2726     *olen = 4;
2727     return -1;
2728 }
2729 
2730 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2731                        uintptr_t ra, uint32_t c, uint32_t *olen)
2732 {
2733     uint8_t d[4];
2734     uint32_t l, i;
2735 
2736     if (c <= 0x7f) {
2737         /* one byte character */
2738         l = 1;
2739         d[0] = c;
2740     } else if (c <= 0x7ff) {
2741         /* two byte character */
2742         l = 2;
2743         d[1] = 0x80 | extract32(c, 0, 6);
2744         d[0] = 0xc0 | extract32(c, 6, 5);
2745     } else if (c <= 0xffff) {
2746         /* three byte character */
2747         l = 3;
2748         d[2] = 0x80 | extract32(c, 0, 6);
2749         d[1] = 0x80 | extract32(c, 6, 6);
2750         d[0] = 0xe0 | extract32(c, 12, 4);
2751     } else {
2752         /* four byte character */
2753         l = 4;
2754         d[3] = 0x80 | extract32(c, 0, 6);
2755         d[2] = 0x80 | extract32(c, 6, 6);
2756         d[1] = 0x80 | extract32(c, 12, 6);
2757         d[0] = 0xf0 | extract32(c, 18, 3);
2758     }
2759 
2760     if (ilen < l) {
2761         return 1;
2762     }
2763     for (i = 0; i < l; ++i) {
2764         cpu_stb_data_ra(env, addr + i, d[i], ra);
2765     }
2766 
2767     *olen = l;
2768     return -1;
2769 }
2770 
2771 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2772                         uintptr_t ra, uint32_t c, uint32_t *olen)
2773 {
2774     uint16_t d0, d1;
2775 
2776     if (c <= 0xffff) {
2777         /* one word character */
2778         if (ilen < 2) {
2779             return 1;
2780         }
2781         cpu_stw_data_ra(env, addr, c, ra);
2782         *olen = 2;
2783     } else {
2784         /* two word character */
2785         if (ilen < 4) {
2786             return 1;
2787         }
2788         d1 = 0xdc00 | extract32(c, 0, 10);
2789         d0 = 0xd800 | extract32(c, 10, 6);
2790         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2791         cpu_stw_data_ra(env, addr + 0, d0, ra);
2792         cpu_stw_data_ra(env, addr + 2, d1, ra);
2793         *olen = 4;
2794     }
2795 
2796     return -1;
2797 }
2798 
2799 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2800                         uintptr_t ra, uint32_t c, uint32_t *olen)
2801 {
2802     if (ilen < 4) {
2803         return 1;
2804     }
2805     cpu_stl_data_ra(env, addr, c, ra);
2806     *olen = 4;
2807     return -1;
2808 }
2809 
2810 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2811                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2812                                        decode_unicode_fn decode,
2813                                        encode_unicode_fn encode)
2814 {
2815     uint64_t dst = get_address(env, r1);
2816     uint64_t dlen = get_length(env, r1 + 1);
2817     uint64_t src = get_address(env, r2);
2818     uint64_t slen = get_length(env, r2 + 1);
2819     bool enh_check = m3 & 1;
2820     int cc, i;
2821 
2822     /* Lest we fail to service interrupts in a timely manner, limit the
2823        amount of work we're willing to do.  For now, let's cap at 256.  */
2824     for (i = 0; i < 256; ++i) {
2825         uint32_t c, ilen, olen;
2826 
2827         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2828         if (unlikely(cc >= 0)) {
2829             break;
2830         }
2831         cc = encode(env, dst, dlen, ra, c, &olen);
2832         if (unlikely(cc >= 0)) {
2833             break;
2834         }
2835 
2836         src += ilen;
2837         slen -= ilen;
2838         dst += olen;
2839         dlen -= olen;
2840         cc = 3;
2841     }
2842 
2843     set_address(env, r1, dst);
2844     set_length(env, r1 + 1, dlen);
2845     set_address(env, r2, src);
2846     set_length(env, r2 + 1, slen);
2847 
2848     return cc;
2849 }
2850 
2851 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2852 {
2853     return convert_unicode(env, r1, r2, m3, GETPC(),
2854                            decode_utf8, encode_utf16);
2855 }
2856 
2857 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2858 {
2859     return convert_unicode(env, r1, r2, m3, GETPC(),
2860                            decode_utf8, encode_utf32);
2861 }
2862 
2863 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2864 {
2865     return convert_unicode(env, r1, r2, m3, GETPC(),
2866                            decode_utf16, encode_utf8);
2867 }
2868 
2869 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2870 {
2871     return convert_unicode(env, r1, r2, m3, GETPC(),
2872                            decode_utf16, encode_utf32);
2873 }
2874 
2875 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2876 {
2877     return convert_unicode(env, r1, r2, m3, GETPC(),
2878                            decode_utf32, encode_utf8);
2879 }
2880 
2881 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2882 {
2883     return convert_unicode(env, r1, r2, m3, GETPC(),
2884                            decode_utf32, encode_utf16);
2885 }
2886 
2887 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2888                         uintptr_t ra)
2889 {
2890     const int mmu_idx = s390x_env_mmu_index(env, false);
2891 
2892     /* test the actual access, not just any access to the page due to LAP */
2893     while (len) {
2894         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2895         const uint64_t curlen = MIN(pagelen, len);
2896 
2897         probe_write(env, addr, curlen, mmu_idx, ra);
2898         addr = wrap_address(env, addr + curlen);
2899         len -= curlen;
2900     }
2901 }
2902 
2903 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2904 {
2905     probe_write_access(env, addr, len, GETPC());
2906 }
2907