xref: /qemu/target/s390x/tcg/mem_helper.c (revision cc3d262aa93a42e19c38f6acb6d0f6012a71eb4b)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/cpu-common.h"
28 #include "exec/exec-all.h"
29 #include "exec/cputlb.h"
30 #include "exec/page-protection.h"
31 #include "exec/cpu_ldst.h"
32 #include "accel/tcg/cpu-ops.h"
33 #include "qemu/int128.h"
34 #include "qemu/atomic128.h"
35 
36 #if defined(CONFIG_USER_ONLY)
37 #include "user/page-protection.h"
38 #else
39 #include "hw/s390x/storage-keys.h"
40 #include "hw/boards.h"
41 #endif
42 
43 #ifdef CONFIG_USER_ONLY
44 # define user_or_likely(X)    true
45 #else
46 # define user_or_likely(X)    likely(X)
47 #endif
48 
49 /*****************************************************************************/
50 /* Softmmu support */
51 
52 /* #define DEBUG_HELPER */
53 #ifdef DEBUG_HELPER
54 #define HELPER_LOG(x...) qemu_log(x)
55 #else
56 #define HELPER_LOG(x...)
57 #endif
58 
59 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
60 {
61     uint16_t pkm = env->cregs[3] >> 16;
62 
63     if (env->psw.mask & PSW_MASK_PSTATE) {
64         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
65         return pkm & (0x8000 >> psw_key);
66     }
67     return true;
68 }
69 
70 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
71                                    uint64_t src, uint32_t len)
72 {
73     if (!len || src == dest) {
74         return false;
75     }
76     /* Take care of wrapping at the end of address space. */
77     if (unlikely(wrap_address(env, src + len - 1) < src)) {
78         return dest > src || dest <= wrap_address(env, src + len - 1);
79     }
80     return dest > src && dest <= src + len - 1;
81 }
82 
83 /* Trigger a SPECIFICATION exception if an address or a length is not
84    naturally aligned.  */
85 static inline void check_alignment(CPUS390XState *env, uint64_t v,
86                                    int wordsize, uintptr_t ra)
87 {
88     if (v % wordsize) {
89         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
90     }
91 }
92 
93 /* Load a value from memory according to its size.  */
94 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
95                                            int wordsize, uintptr_t ra)
96 {
97     switch (wordsize) {
98     case 1:
99         return cpu_ldub_data_ra(env, addr, ra);
100     case 2:
101         return cpu_lduw_data_ra(env, addr, ra);
102     default:
103         abort();
104     }
105 }
106 
107 /* Store a to memory according to its size.  */
108 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
109                                       uint64_t value, int wordsize,
110                                       uintptr_t ra)
111 {
112     switch (wordsize) {
113     case 1:
114         cpu_stb_data_ra(env, addr, value, ra);
115         break;
116     case 2:
117         cpu_stw_data_ra(env, addr, value, ra);
118         break;
119     default:
120         abort();
121     }
122 }
123 
124 /* An access covers at most 4096 bytes and therefore at most two pages. */
125 typedef struct S390Access {
126     target_ulong vaddr1;
127     target_ulong vaddr2;
128     void *haddr1;
129     void *haddr2;
130     uint16_t size1;
131     uint16_t size2;
132     /*
133      * If we can't access the host page directly, we'll have to do I/O access
134      * via ld/st helpers. These are internal details, so we store the
135      * mmu idx to do the access here instead of passing it around in the
136      * helpers.
137      */
138     int mmu_idx;
139 } S390Access;
140 
141 /*
142  * With nonfault=1, return the PGM_ exception that would have been injected
143  * into the guest; return 0 if no exception was detected.
144  *
145  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
146  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
147  */
148 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
149                                     int size, MMUAccessType access_type,
150                                     int mmu_idx, bool nonfault,
151                                     void **phost, uintptr_t ra)
152 {
153     int flags = probe_access_flags(env, addr, size, access_type, mmu_idx,
154                                    nonfault, phost, ra);
155 
156     if (unlikely(flags & TLB_INVALID_MASK)) {
157 #ifdef CONFIG_USER_ONLY
158         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
159         env->__excp_addr = addr & TARGET_PAGE_MASK;
160         return (page_get_flags(addr) & PAGE_VALID
161                 ? PGM_PROTECTION : PGM_ADDRESSING);
162 #else
163         return env->tlb_fill_exc;
164 #endif
165     }
166 
167 #ifndef CONFIG_USER_ONLY
168     if (unlikely(flags & TLB_WATCHPOINT)) {
169         /* S390 does not presently use transaction attributes. */
170         cpu_check_watchpoint(env_cpu(env), addr, size,
171                              MEMTXATTRS_UNSPECIFIED,
172                              (access_type == MMU_DATA_STORE
173                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
174     }
175 #endif
176 
177     return 0;
178 }
179 
180 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
181                              bool nonfault, vaddr vaddr1, int size,
182                              MMUAccessType access_type,
183                              int mmu_idx, uintptr_t ra)
184 {
185     int size1, size2, exc;
186 
187     assert(size > 0 && size <= 4096);
188 
189     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
190     size2 = size - size1;
191 
192     memset(access, 0, sizeof(*access));
193     access->vaddr1 = vaddr1;
194     access->size1 = size1;
195     access->size2 = size2;
196     access->mmu_idx = mmu_idx;
197 
198     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
199                             &access->haddr1, ra);
200     if (unlikely(exc)) {
201         return exc;
202     }
203     if (unlikely(size2)) {
204         /* The access crosses page boundaries. */
205         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
206 
207         access->vaddr2 = vaddr2;
208         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
209                                 nonfault, &access->haddr2, ra);
210         if (unlikely(exc)) {
211             return exc;
212         }
213     }
214     return 0;
215 }
216 
217 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
218                                   vaddr vaddr, int size,
219                                   MMUAccessType access_type, int mmu_idx,
220                                   uintptr_t ra)
221 {
222     int exc = access_prepare_nf(ret, env, false, vaddr, size,
223                                 access_type, mmu_idx, ra);
224     assert(!exc);
225 }
226 
227 /* Helper to handle memset on a single page. */
228 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
229                              uint8_t byte, uint16_t size, int mmu_idx,
230                              uintptr_t ra)
231 {
232     if (user_or_likely(haddr)) {
233         memset(haddr, byte, size);
234     } else {
235         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
236         for (int i = 0; i < size; i++) {
237             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
238         }
239     }
240 }
241 
242 static void access_memset(CPUS390XState *env, S390Access *desta,
243                           uint8_t byte, uintptr_t ra)
244 {
245     set_helper_retaddr(ra);
246     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
247                      desta->mmu_idx, ra);
248     if (unlikely(desta->size2)) {
249         do_access_memset(env, desta->vaddr2, desta->haddr2, byte,
250                          desta->size2, desta->mmu_idx, ra);
251     }
252     clear_helper_retaddr();
253 }
254 
255 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
256                                int offset, uintptr_t ra)
257 {
258     target_ulong vaddr = access->vaddr1;
259     void *haddr = access->haddr1;
260 
261     if (unlikely(offset >= access->size1)) {
262         offset -= access->size1;
263         vaddr = access->vaddr2;
264         haddr = access->haddr2;
265     }
266 
267     if (user_or_likely(haddr)) {
268         return ldub_p(haddr + offset);
269     } else {
270         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
271         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
272     }
273 }
274 
275 static void access_set_byte(CPUS390XState *env, S390Access *access,
276                             int offset, uint8_t byte, uintptr_t ra)
277 {
278     target_ulong vaddr = access->vaddr1;
279     void *haddr = access->haddr1;
280 
281     if (unlikely(offset >= access->size1)) {
282         offset -= access->size1;
283         vaddr = access->vaddr2;
284         haddr = access->haddr2;
285     }
286 
287     if (user_or_likely(haddr)) {
288         stb_p(haddr + offset, byte);
289     } else {
290         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
291         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
292     }
293 }
294 
295 /*
296  * Move data with the same semantics as memmove() in case ranges don't overlap
297  * or src > dest. Undefined behavior on destructive overlaps.
298  */
299 static void access_memmove(CPUS390XState *env, S390Access *desta,
300                            S390Access *srca, uintptr_t ra)
301 {
302     int len = desta->size1 + desta->size2;
303 
304     assert(len == srca->size1 + srca->size2);
305 
306     /* Fallback to slow access in case we don't have access to all host pages */
307     if (user_or_likely(desta->haddr1 &&
308                        srca->haddr1 &&
309                        (!desta->size2 || desta->haddr2) &&
310                        (!srca->size2 || srca->haddr2))) {
311         int diff = desta->size1 - srca->size1;
312 
313         if (likely(diff == 0)) {
314             memmove(desta->haddr1, srca->haddr1, srca->size1);
315             if (unlikely(srca->size2)) {
316                 memmove(desta->haddr2, srca->haddr2, srca->size2);
317             }
318         } else if (diff > 0) {
319             memmove(desta->haddr1, srca->haddr1, srca->size1);
320             memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
321             if (likely(desta->size2)) {
322                 memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
323             }
324         } else {
325             diff = -diff;
326             memmove(desta->haddr1, srca->haddr1, desta->size1);
327             memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
328             if (likely(srca->size2)) {
329                 memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
330             }
331         }
332     } else {
333         for (int i = 0; i < len; i++) {
334             uint8_t byte = access_get_byte(env, srca, i, ra);
335             access_set_byte(env, desta, i, byte, ra);
336         }
337     }
338 }
339 
340 static int mmu_idx_from_as(uint8_t as)
341 {
342     switch (as) {
343     case AS_PRIMARY:
344         return MMU_PRIMARY_IDX;
345     case AS_SECONDARY:
346         return MMU_SECONDARY_IDX;
347     case AS_HOME:
348         return MMU_HOME_IDX;
349     default:
350         /* FIXME AS_ACCREG */
351         g_assert_not_reached();
352     }
353 }
354 
355 /* and on array */
356 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
357                              uint64_t src, uintptr_t ra)
358 {
359     const int mmu_idx = s390x_env_mmu_index(env, false);
360     S390Access srca1, srca2, desta;
361     uint32_t i;
362     uint8_t c = 0;
363 
364     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
365                __func__, l, dest, src);
366 
367     /* NC always processes one more byte than specified - maximum is 256 */
368     l++;
369 
370     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
371     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
372     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
373     set_helper_retaddr(ra);
374 
375     for (i = 0; i < l; i++) {
376         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
377                           access_get_byte(env, &srca2, i, ra);
378 
379         c |= x;
380         access_set_byte(env, &desta, i, x, ra);
381     }
382 
383     clear_helper_retaddr();
384     return c != 0;
385 }
386 
387 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
388                     uint64_t src)
389 {
390     return do_helper_nc(env, l, dest, src, GETPC());
391 }
392 
393 /* xor on array */
394 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
395                              uint64_t src, uintptr_t ra)
396 {
397     const int mmu_idx = s390x_env_mmu_index(env, false);
398     S390Access srca1, srca2, desta;
399     uint32_t i;
400     uint8_t c = 0;
401 
402     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
403                __func__, l, dest, src);
404 
405     /* XC always processes one more byte than specified - maximum is 256 */
406     l++;
407 
408     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
409     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
410     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
411 
412     /* xor with itself is the same as memset(0) */
413     if (src == dest) {
414         access_memset(env, &desta, 0, ra);
415         return 0;
416     }
417 
418     set_helper_retaddr(ra);
419     for (i = 0; i < l; i++) {
420         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
421                           access_get_byte(env, &srca2, i, ra);
422 
423         c |= x;
424         access_set_byte(env, &desta, i, x, ra);
425     }
426     clear_helper_retaddr();
427     return c != 0;
428 }
429 
430 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
431                     uint64_t src)
432 {
433     return do_helper_xc(env, l, dest, src, GETPC());
434 }
435 
436 /* or on array */
437 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
438                              uint64_t src, uintptr_t ra)
439 {
440     const int mmu_idx = s390x_env_mmu_index(env, false);
441     S390Access srca1, srca2, desta;
442     uint32_t i;
443     uint8_t c = 0;
444 
445     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
446                __func__, l, dest, src);
447 
448     /* OC always processes one more byte than specified - maximum is 256 */
449     l++;
450 
451     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
452     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
453     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
454     set_helper_retaddr(ra);
455 
456     for (i = 0; i < l; i++) {
457         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
458                           access_get_byte(env, &srca2, i, ra);
459 
460         c |= x;
461         access_set_byte(env, &desta, i, x, ra);
462     }
463 
464     clear_helper_retaddr();
465     return c != 0;
466 }
467 
468 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
469                     uint64_t src)
470 {
471     return do_helper_oc(env, l, dest, src, GETPC());
472 }
473 
474 /* memmove */
475 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
476                               uint64_t src, uintptr_t ra)
477 {
478     const int mmu_idx = s390x_env_mmu_index(env, false);
479     S390Access srca, desta;
480     uint32_t i;
481 
482     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
483                __func__, l, dest, src);
484 
485     /* MVC always copies one more byte than specified - maximum is 256 */
486     l++;
487 
488     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
489     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
490 
491     /*
492      * "When the operands overlap, the result is obtained as if the operands
493      * were processed one byte at a time". Only non-destructive overlaps
494      * behave like memmove().
495      */
496     if (dest == src + 1) {
497         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
498     } else if (!is_destructive_overlap(env, dest, src, l)) {
499         access_memmove(env, &desta, &srca, ra);
500     } else {
501         set_helper_retaddr(ra);
502         for (i = 0; i < l; i++) {
503             uint8_t byte = access_get_byte(env, &srca, i, ra);
504 
505             access_set_byte(env, &desta, i, byte, ra);
506         }
507         clear_helper_retaddr();
508     }
509 
510     return env->cc_op;
511 }
512 
513 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
514 {
515     do_helper_mvc(env, l, dest, src, GETPC());
516 }
517 
518 /* move right to left */
519 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
520 {
521     const int mmu_idx = s390x_env_mmu_index(env, false);
522     const uint64_t ra = GETPC();
523     S390Access srca, desta;
524     int32_t i;
525 
526     /* MVCRL always copies one more byte than specified - maximum is 256 */
527     l &= 0xff;
528     l++;
529 
530     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
531     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
532 
533     set_helper_retaddr(ra);
534     for (i = l - 1; i >= 0; i--) {
535         uint8_t byte = access_get_byte(env, &srca, i, ra);
536         access_set_byte(env, &desta, i, byte, ra);
537     }
538     clear_helper_retaddr();
539 }
540 
541 /* move inverse  */
542 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
543 {
544     const int mmu_idx = s390x_env_mmu_index(env, false);
545     S390Access srca, desta;
546     uintptr_t ra = GETPC();
547     int i;
548 
549     /* MVCIN always copies one more byte than specified - maximum is 256 */
550     l++;
551 
552     src = wrap_address(env, src - l + 1);
553     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
554     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
555 
556     set_helper_retaddr(ra);
557     for (i = 0; i < l; i++) {
558         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
559         access_set_byte(env, &desta, i, x, ra);
560     }
561     clear_helper_retaddr();
562 }
563 
564 /* move numerics  */
565 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
566 {
567     const int mmu_idx = s390x_env_mmu_index(env, false);
568     S390Access srca1, srca2, desta;
569     uintptr_t ra = GETPC();
570     int i;
571 
572     /* MVN always copies one more byte than specified - maximum is 256 */
573     l++;
574 
575     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
576     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
577     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
578 
579     set_helper_retaddr(ra);
580     for (i = 0; i < l; i++) {
581         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
582                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
583 
584         access_set_byte(env, &desta, i, x, ra);
585     }
586     clear_helper_retaddr();
587 }
588 
589 /* move with offset  */
590 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
591 {
592     const int mmu_idx = s390x_env_mmu_index(env, false);
593     /* MVO always processes one more byte than specified - maximum is 16 */
594     const int len_dest = (l >> 4) + 1;
595     const int len_src = (l & 0xf) + 1;
596     uintptr_t ra = GETPC();
597     uint8_t byte_dest, byte_src;
598     S390Access srca, desta;
599     int i, j;
600 
601     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
602     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
603 
604     /* Handle rightmost byte */
605     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
606 
607     set_helper_retaddr(ra);
608     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
609     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
610     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
611 
612     /* Process remaining bytes from right to left */
613     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
614         byte_dest = byte_src >> 4;
615         if (j >= 0) {
616             byte_src = access_get_byte(env, &srca, j, ra);
617         } else {
618             byte_src = 0;
619         }
620         byte_dest |= byte_src << 4;
621         access_set_byte(env, &desta, i, byte_dest, ra);
622     }
623     clear_helper_retaddr();
624 }
625 
626 /* move zones  */
627 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
628 {
629     const int mmu_idx = s390x_env_mmu_index(env, false);
630     S390Access srca1, srca2, desta;
631     uintptr_t ra = GETPC();
632     int i;
633 
634     /* MVZ always copies one more byte than specified - maximum is 256 */
635     l++;
636 
637     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
638     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
639     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
640 
641     set_helper_retaddr(ra);
642     for (i = 0; i < l; i++) {
643         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
644                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
645 
646         access_set_byte(env, &desta, i, x, ra);
647     }
648     clear_helper_retaddr();
649 }
650 
651 /* compare unsigned byte arrays */
652 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
653                               uint64_t s2, uintptr_t ra)
654 {
655     uint32_t i;
656     uint32_t cc = 0;
657 
658     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
659                __func__, l, s1, s2);
660 
661     for (i = 0; i <= l; i++) {
662         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
663         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
664         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
665         if (x < y) {
666             cc = 1;
667             break;
668         } else if (x > y) {
669             cc = 2;
670             break;
671         }
672     }
673 
674     HELPER_LOG("\n");
675     return cc;
676 }
677 
678 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
679 {
680     return do_helper_clc(env, l, s1, s2, GETPC());
681 }
682 
683 /* compare logical under mask */
684 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
685                      uint64_t addr)
686 {
687     uintptr_t ra = GETPC();
688     uint32_t cc = 0;
689 
690     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
691                mask, addr);
692 
693     if (!mask) {
694         /* Recognize access exceptions for the first byte */
695         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
696     }
697 
698     while (mask) {
699         if (mask & 8) {
700             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
701             uint8_t r = extract32(r1, 24, 8);
702             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
703                        addr);
704             if (r < d) {
705                 cc = 1;
706                 break;
707             } else if (r > d) {
708                 cc = 2;
709                 break;
710             }
711             addr++;
712         }
713         mask = (mask << 1) & 0xf;
714         r1 <<= 8;
715     }
716 
717     HELPER_LOG("\n");
718     return cc;
719 }
720 
721 static inline uint64_t get_address(CPUS390XState *env, int reg)
722 {
723     return wrap_address(env, env->regs[reg]);
724 }
725 
726 /*
727  * Store the address to the given register, zeroing out unused leftmost
728  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
729  */
730 static inline void set_address_zero(CPUS390XState *env, int reg,
731                                     uint64_t address)
732 {
733     if (env->psw.mask & PSW_MASK_64) {
734         env->regs[reg] = address;
735     } else {
736         if (!(env->psw.mask & PSW_MASK_32)) {
737             address &= 0x00ffffff;
738         } else {
739             address &= 0x7fffffff;
740         }
741         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
742     }
743 }
744 
745 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
746 {
747     if (env->psw.mask & PSW_MASK_64) {
748         /* 64-Bit mode */
749         env->regs[reg] = address;
750     } else {
751         if (!(env->psw.mask & PSW_MASK_32)) {
752             /* 24-Bit mode. According to the PoO it is implementation
753             dependent if bits 32-39 remain unchanged or are set to
754             zeros.  Choose the former so that the function can also be
755             used for TRT.  */
756             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
757         } else {
758             /* 31-Bit mode. According to the PoO it is implementation
759             dependent if bit 32 remains unchanged or is set to zero.
760             Choose the latter so that the function can also be used for
761             TRT.  */
762             address &= 0x7fffffff;
763             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
764         }
765     }
766 }
767 
768 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
769 {
770     if (!(env->psw.mask & PSW_MASK_64)) {
771         return (uint32_t)length;
772     }
773     return length;
774 }
775 
776 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
777 {
778     if (!(env->psw.mask & PSW_MASK_64)) {
779         /* 24-Bit and 31-Bit mode */
780         length &= 0x7fffffff;
781     }
782     return length;
783 }
784 
785 static inline uint64_t get_length(CPUS390XState *env, int reg)
786 {
787     return wrap_length31(env, env->regs[reg]);
788 }
789 
790 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
791 {
792     if (env->psw.mask & PSW_MASK_64) {
793         /* 64-Bit mode */
794         env->regs[reg] = length;
795     } else {
796         /* 24-Bit and 31-Bit mode */
797         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
798     }
799 }
800 
801 /* search string (c is byte to search, r2 is string, r1 end of string) */
802 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
803 {
804     uintptr_t ra = GETPC();
805     uint64_t end, str;
806     uint32_t len;
807     uint8_t v, c = env->regs[0];
808 
809     /* Bits 32-55 must contain all 0.  */
810     if (env->regs[0] & 0xffffff00u) {
811         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
812     }
813 
814     str = get_address(env, r2);
815     end = get_address(env, r1);
816 
817     /* Lest we fail to service interrupts in a timely manner, limit the
818        amount of work we're willing to do.  For now, let's cap at 8k.  */
819     for (len = 0; len < 0x2000; ++len) {
820         if (str + len == end) {
821             /* Character not found.  R1 & R2 are unmodified.  */
822             env->cc_op = 2;
823             return;
824         }
825         v = cpu_ldub_data_ra(env, str + len, ra);
826         if (v == c) {
827             /* Character found.  Set R1 to the location; R2 is unmodified.  */
828             env->cc_op = 1;
829             set_address(env, r1, str + len);
830             return;
831         }
832     }
833 
834     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
835     env->cc_op = 3;
836     set_address(env, r2, str + len);
837 }
838 
839 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
840 {
841     uintptr_t ra = GETPC();
842     uint32_t len;
843     uint16_t v, c = env->regs[0];
844     uint64_t end, str, adj_end;
845 
846     /* Bits 32-47 of R0 must be zero.  */
847     if (env->regs[0] & 0xffff0000u) {
848         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
849     }
850 
851     str = get_address(env, r2);
852     end = get_address(env, r1);
853 
854     /* If the LSB of the two addresses differ, use one extra byte.  */
855     adj_end = end + ((str ^ end) & 1);
856 
857     /* Lest we fail to service interrupts in a timely manner, limit the
858        amount of work we're willing to do.  For now, let's cap at 8k.  */
859     for (len = 0; len < 0x2000; len += 2) {
860         if (str + len == adj_end) {
861             /* End of input found.  */
862             env->cc_op = 2;
863             return;
864         }
865         v = cpu_lduw_data_ra(env, str + len, ra);
866         if (v == c) {
867             /* Character found.  Set R1 to the location; R2 is unmodified.  */
868             env->cc_op = 1;
869             set_address(env, r1, str + len);
870             return;
871         }
872     }
873 
874     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
875     env->cc_op = 3;
876     set_address(env, r2, str + len);
877 }
878 
879 /* unsigned string compare (c is string terminator) */
880 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
881 {
882     uintptr_t ra = GETPC();
883     uint32_t len;
884 
885     c = c & 0xff;
886     s1 = wrap_address(env, s1);
887     s2 = wrap_address(env, s2);
888 
889     /* Lest we fail to service interrupts in a timely manner, limit the
890        amount of work we're willing to do.  For now, let's cap at 8k.  */
891     for (len = 0; len < 0x2000; ++len) {
892         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
893         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
894         if (v1 == v2) {
895             if (v1 == c) {
896                 /* Equal.  CC=0, and don't advance the registers.  */
897                 env->cc_op = 0;
898                 return int128_make128(s2, s1);
899             }
900         } else {
901             /* Unequal.  CC={1,2}, and advance the registers.  Note that
902                the terminator need not be zero, but the string that contains
903                the terminator is by definition "low".  */
904             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
905             return int128_make128(s2 + len, s1 + len);
906         }
907     }
908 
909     /* CPU-determined bytes equal; advance the registers.  */
910     env->cc_op = 3;
911     return int128_make128(s2 + len, s1 + len);
912 }
913 
914 /* move page */
915 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
916 {
917     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
918     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
919     const int mmu_idx = s390x_env_mmu_index(env, false);
920     const bool f = extract64(r0, 11, 1);
921     const bool s = extract64(r0, 10, 1);
922     const bool cco = extract64(r0, 8, 1);
923     uintptr_t ra = GETPC();
924     S390Access srca, desta;
925     int exc;
926 
927     if ((f && s) || extract64(r0, 12, 4)) {
928         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
929     }
930 
931     /*
932      * We always manually handle exceptions such that we can properly store
933      * r1/r2 to the lowcore on page-translation exceptions.
934      *
935      * TODO: Access key handling
936      */
937     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
938                             MMU_DATA_LOAD, mmu_idx, ra);
939     if (exc) {
940         if (cco) {
941             return 2;
942         }
943         goto inject_exc;
944     }
945     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
946                             MMU_DATA_STORE, mmu_idx, ra);
947     if (exc) {
948         if (cco && exc != PGM_PROTECTION) {
949             return 1;
950         }
951         goto inject_exc;
952     }
953     access_memmove(env, &desta, &srca, ra);
954     return 0; /* data moved */
955 inject_exc:
956 #if !defined(CONFIG_USER_ONLY)
957     if (exc != PGM_ADDRESSING) {
958         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
959                  env->tlb_fill_tec);
960     }
961     if (exc == PGM_PAGE_TRANS) {
962         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
963                  r1 << 4 | r2);
964     }
965 #endif
966     tcg_s390_program_interrupt(env, exc, ra);
967 }
968 
969 /* string copy */
970 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
971 {
972     const int mmu_idx = s390x_env_mmu_index(env, false);
973     const uint64_t d = get_address(env, r1);
974     const uint64_t s = get_address(env, r2);
975     const uint8_t c = env->regs[0];
976     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
977     S390Access srca, desta;
978     uintptr_t ra = GETPC();
979     int i;
980 
981     if (env->regs[0] & 0xffffff00ull) {
982         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
983     }
984 
985     /*
986      * Our access should not exceed single pages, as we must not report access
987      * exceptions exceeding the actually copied range (which we don't know at
988      * this point). We might over-indicate watchpoints within the pages
989      * (if we ever care, we have to limit processing to a single byte).
990      */
991     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
992     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
993 
994     set_helper_retaddr(ra);
995     for (i = 0; i < len; i++) {
996         const uint8_t v = access_get_byte(env, &srca, i, ra);
997 
998         access_set_byte(env, &desta, i, v, ra);
999         if (v == c) {
1000             clear_helper_retaddr();
1001             set_address_zero(env, r1, d + i);
1002             return 1;
1003         }
1004     }
1005     clear_helper_retaddr();
1006     set_address_zero(env, r1, d + len);
1007     set_address_zero(env, r2, s + len);
1008     return 3;
1009 }
1010 
1011 /* load access registers r1 to r3 from memory at a2 */
1012 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1013 {
1014     uintptr_t ra = GETPC();
1015     int i;
1016 
1017     if (a2 & 0x3) {
1018         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1019     }
1020 
1021     for (i = r1;; i = (i + 1) % 16) {
1022         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1023         a2 += 4;
1024 
1025         if (i == r3) {
1026             break;
1027         }
1028     }
1029 }
1030 
1031 /* store access registers r1 to r3 in memory at a2 */
1032 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1033 {
1034     uintptr_t ra = GETPC();
1035     int i;
1036 
1037     if (a2 & 0x3) {
1038         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1039     }
1040 
1041     for (i = r1;; i = (i + 1) % 16) {
1042         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1043         a2 += 4;
1044 
1045         if (i == r3) {
1046             break;
1047         }
1048     }
1049 }
1050 
1051 /* move long helper */
1052 static inline uint32_t do_mvcl(CPUS390XState *env,
1053                                uint64_t *dest, uint64_t *destlen,
1054                                uint64_t *src, uint64_t *srclen,
1055                                uint16_t pad, int wordsize, uintptr_t ra)
1056 {
1057     const int mmu_idx = s390x_env_mmu_index(env, false);
1058     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1059     S390Access srca, desta;
1060     int i, cc;
1061 
1062     if (*destlen == *srclen) {
1063         cc = 0;
1064     } else if (*destlen < *srclen) {
1065         cc = 1;
1066     } else {
1067         cc = 2;
1068     }
1069 
1070     if (!*destlen) {
1071         return cc;
1072     }
1073 
1074     /*
1075      * Only perform one type of type of operation (move/pad) at a time.
1076      * Stay within single pages.
1077      */
1078     if (*srclen) {
1079         /* Copy the src array */
1080         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1081         *destlen -= len;
1082         *srclen -= len;
1083         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1084         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1085         access_memmove(env, &desta, &srca, ra);
1086         *src = wrap_address(env, *src + len);
1087         *dest = wrap_address(env, *dest + len);
1088     } else if (wordsize == 1) {
1089         /* Pad the remaining area */
1090         *destlen -= len;
1091         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1092         access_memset(env, &desta, pad, ra);
1093         *dest = wrap_address(env, *dest + len);
1094     } else {
1095         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1096         set_helper_retaddr(ra);
1097 
1098         /* The remaining length selects the padding byte. */
1099         for (i = 0; i < len; (*destlen)--, i++) {
1100             if (*destlen & 1) {
1101                 access_set_byte(env, &desta, i, pad, ra);
1102             } else {
1103                 access_set_byte(env, &desta, i, pad >> 8, ra);
1104             }
1105         }
1106         clear_helper_retaddr();
1107         *dest = wrap_address(env, *dest + len);
1108     }
1109 
1110     return *destlen ? 3 : cc;
1111 }
1112 
1113 /* move long */
1114 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1115 {
1116     const int mmu_idx = s390x_env_mmu_index(env, false);
1117     uintptr_t ra = GETPC();
1118     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1119     uint64_t dest = get_address(env, r1);
1120     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1121     uint64_t src = get_address(env, r2);
1122     uint8_t pad = env->regs[r2 + 1] >> 24;
1123     CPUState *cs = env_cpu(env);
1124     S390Access srca, desta;
1125     uint32_t cc, cur_len;
1126 
1127     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1128         cc = 3;
1129     } else if (srclen == destlen) {
1130         cc = 0;
1131     } else if (destlen < srclen) {
1132         cc = 1;
1133     } else {
1134         cc = 2;
1135     }
1136 
1137     /* We might have to zero-out some bits even if there was no action. */
1138     if (unlikely(!destlen || cc == 3)) {
1139         set_address_zero(env, r2, src);
1140         set_address_zero(env, r1, dest);
1141         return cc;
1142     } else if (!srclen) {
1143         set_address_zero(env, r2, src);
1144     }
1145 
1146     /*
1147      * Only perform one type of type of operation (move/pad) in one step.
1148      * Stay within single pages.
1149      */
1150     while (destlen) {
1151         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1152         if (!srclen) {
1153             access_prepare(&desta, env, dest, cur_len,
1154                            MMU_DATA_STORE, mmu_idx, ra);
1155             access_memset(env, &desta, pad, ra);
1156         } else {
1157             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1158 
1159             access_prepare(&srca, env, src, cur_len,
1160                            MMU_DATA_LOAD, mmu_idx, ra);
1161             access_prepare(&desta, env, dest, cur_len,
1162                            MMU_DATA_STORE, mmu_idx, ra);
1163             access_memmove(env, &desta, &srca, ra);
1164             src = wrap_address(env, src + cur_len);
1165             srclen -= cur_len;
1166             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1167             set_address_zero(env, r2, src);
1168         }
1169         dest = wrap_address(env, dest + cur_len);
1170         destlen -= cur_len;
1171         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1172         set_address_zero(env, r1, dest);
1173 
1174         /*
1175          * MVCL is interruptible. Return to the main loop if requested after
1176          * writing back all state to registers. If no interrupt will get
1177          * injected, we'll end up back in this handler and continue processing
1178          * the remaining parts.
1179          */
1180         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1181             cpu_loop_exit_restore(cs, ra);
1182         }
1183     }
1184     return cc;
1185 }
1186 
1187 /* move long extended */
1188 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1189                        uint32_t r3)
1190 {
1191     uintptr_t ra = GETPC();
1192     uint64_t destlen = get_length(env, r1 + 1);
1193     uint64_t dest = get_address(env, r1);
1194     uint64_t srclen = get_length(env, r3 + 1);
1195     uint64_t src = get_address(env, r3);
1196     uint8_t pad = a2;
1197     uint32_t cc;
1198 
1199     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1200 
1201     set_length(env, r1 + 1, destlen);
1202     set_length(env, r3 + 1, srclen);
1203     set_address(env, r1, dest);
1204     set_address(env, r3, src);
1205 
1206     return cc;
1207 }
1208 
1209 /* move long unicode */
1210 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1211                        uint32_t r3)
1212 {
1213     uintptr_t ra = GETPC();
1214     uint64_t destlen = get_length(env, r1 + 1);
1215     uint64_t dest = get_address(env, r1);
1216     uint64_t srclen = get_length(env, r3 + 1);
1217     uint64_t src = get_address(env, r3);
1218     uint16_t pad = a2;
1219     uint32_t cc;
1220 
1221     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1222 
1223     set_length(env, r1 + 1, destlen);
1224     set_length(env, r3 + 1, srclen);
1225     set_address(env, r1, dest);
1226     set_address(env, r3, src);
1227 
1228     return cc;
1229 }
1230 
1231 /* compare logical long helper */
1232 static inline uint32_t do_clcl(CPUS390XState *env,
1233                                uint64_t *src1, uint64_t *src1len,
1234                                uint64_t *src3, uint64_t *src3len,
1235                                uint16_t pad, uint64_t limit,
1236                                int wordsize, uintptr_t ra)
1237 {
1238     uint64_t len = MAX(*src1len, *src3len);
1239     uint32_t cc = 0;
1240 
1241     check_alignment(env, *src1len | *src3len, wordsize, ra);
1242 
1243     if (!len) {
1244         return cc;
1245     }
1246 
1247     /* Lest we fail to service interrupts in a timely manner, limit the
1248        amount of work we're willing to do.  */
1249     if (len > limit) {
1250         len = limit;
1251         cc = 3;
1252     }
1253 
1254     for (; len; len -= wordsize) {
1255         uint16_t v1 = pad;
1256         uint16_t v3 = pad;
1257 
1258         if (*src1len) {
1259             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1260         }
1261         if (*src3len) {
1262             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1263         }
1264 
1265         if (v1 != v3) {
1266             cc = (v1 < v3) ? 1 : 2;
1267             break;
1268         }
1269 
1270         if (*src1len) {
1271             *src1 += wordsize;
1272             *src1len -= wordsize;
1273         }
1274         if (*src3len) {
1275             *src3 += wordsize;
1276             *src3len -= wordsize;
1277         }
1278     }
1279 
1280     return cc;
1281 }
1282 
1283 
1284 /* compare logical long */
1285 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1286 {
1287     uintptr_t ra = GETPC();
1288     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1289     uint64_t src1 = get_address(env, r1);
1290     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1291     uint64_t src3 = get_address(env, r2);
1292     uint8_t pad = env->regs[r2 + 1] >> 24;
1293     uint32_t cc;
1294 
1295     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1296 
1297     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1298     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1299     set_address(env, r1, src1);
1300     set_address(env, r2, src3);
1301 
1302     return cc;
1303 }
1304 
1305 /* compare logical long extended memcompare insn with padding */
1306 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1307                        uint32_t r3)
1308 {
1309     uintptr_t ra = GETPC();
1310     uint64_t src1len = get_length(env, r1 + 1);
1311     uint64_t src1 = get_address(env, r1);
1312     uint64_t src3len = get_length(env, r3 + 1);
1313     uint64_t src3 = get_address(env, r3);
1314     uint8_t pad = a2;
1315     uint32_t cc;
1316 
1317     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1318 
1319     set_length(env, r1 + 1, src1len);
1320     set_length(env, r3 + 1, src3len);
1321     set_address(env, r1, src1);
1322     set_address(env, r3, src3);
1323 
1324     return cc;
1325 }
1326 
1327 /* compare logical long unicode memcompare insn with padding */
1328 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1329                        uint32_t r3)
1330 {
1331     uintptr_t ra = GETPC();
1332     uint64_t src1len = get_length(env, r1 + 1);
1333     uint64_t src1 = get_address(env, r1);
1334     uint64_t src3len = get_length(env, r3 + 1);
1335     uint64_t src3 = get_address(env, r3);
1336     uint16_t pad = a2;
1337     uint32_t cc = 0;
1338 
1339     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1340 
1341     set_length(env, r1 + 1, src1len);
1342     set_length(env, r3 + 1, src3len);
1343     set_address(env, r1, src1);
1344     set_address(env, r3, src3);
1345 
1346     return cc;
1347 }
1348 
1349 /* checksum */
1350 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1351                     uint64_t src, uint64_t src_len)
1352 {
1353     uintptr_t ra = GETPC();
1354     uint64_t max_len, len;
1355     uint64_t cksm = (uint32_t)r1;
1356 
1357     /* Lest we fail to service interrupts in a timely manner, limit the
1358        amount of work we're willing to do.  For now, let's cap at 8k.  */
1359     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1360 
1361     /* Process full words as available.  */
1362     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1363         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1364     }
1365 
1366     switch (max_len - len) {
1367     case 1:
1368         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1369         len += 1;
1370         break;
1371     case 2:
1372         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1373         len += 2;
1374         break;
1375     case 3:
1376         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1377         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1378         len += 3;
1379         break;
1380     }
1381 
1382     /* Fold the carry from the checksum.  Note that we can see carry-out
1383        during folding more than once (but probably not more than twice).  */
1384     while (cksm > 0xffffffffull) {
1385         cksm = (uint32_t)cksm + (cksm >> 32);
1386     }
1387 
1388     /* Indicate whether or not we've processed everything.  */
1389     env->cc_op = (len == src_len ? 0 : 3);
1390 
1391     /* Return both cksm and processed length.  */
1392     return int128_make128(cksm, len);
1393 }
1394 
1395 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1396 {
1397     uintptr_t ra = GETPC();
1398     int len_dest = len >> 4;
1399     int len_src = len & 0xf;
1400     uint8_t b;
1401 
1402     dest += len_dest;
1403     src += len_src;
1404 
1405     /* last byte is special, it only flips the nibbles */
1406     b = cpu_ldub_data_ra(env, src, ra);
1407     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1408     src--;
1409     len_src--;
1410 
1411     /* now pack every value */
1412     while (len_dest > 0) {
1413         b = 0;
1414 
1415         if (len_src >= 0) {
1416             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1417             src--;
1418             len_src--;
1419         }
1420         if (len_src >= 0) {
1421             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1422             src--;
1423             len_src--;
1424         }
1425 
1426         len_dest--;
1427         dest--;
1428         cpu_stb_data_ra(env, dest, b, ra);
1429     }
1430 }
1431 
1432 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1433                            uint32_t srclen, int ssize, uintptr_t ra)
1434 {
1435     int i;
1436     /* The destination operand is always 16 bytes long.  */
1437     const int destlen = 16;
1438 
1439     /* The operands are processed from right to left.  */
1440     src += srclen - 1;
1441     dest += destlen - 1;
1442 
1443     for (i = 0; i < destlen; i++) {
1444         uint8_t b = 0;
1445 
1446         /* Start with a positive sign */
1447         if (i == 0) {
1448             b = 0xc;
1449         } else if (srclen > ssize) {
1450             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1451             src -= ssize;
1452             srclen -= ssize;
1453         }
1454 
1455         if (srclen > ssize) {
1456             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1457             src -= ssize;
1458             srclen -= ssize;
1459         }
1460 
1461         cpu_stb_data_ra(env, dest, b, ra);
1462         dest--;
1463     }
1464 }
1465 
1466 
1467 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1468                  uint32_t srclen)
1469 {
1470     do_pkau(env, dest, src, srclen, 1, GETPC());
1471 }
1472 
1473 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1474                  uint32_t srclen)
1475 {
1476     do_pkau(env, dest, src, srclen, 2, GETPC());
1477 }
1478 
1479 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1480                   uint64_t src)
1481 {
1482     uintptr_t ra = GETPC();
1483     int len_dest = len >> 4;
1484     int len_src = len & 0xf;
1485     uint8_t b;
1486     int second_nibble = 0;
1487 
1488     dest += len_dest;
1489     src += len_src;
1490 
1491     /* last byte is special, it only flips the nibbles */
1492     b = cpu_ldub_data_ra(env, src, ra);
1493     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1494     src--;
1495     len_src--;
1496 
1497     /* now pad every nibble with 0xf0 */
1498 
1499     while (len_dest > 0) {
1500         uint8_t cur_byte = 0;
1501 
1502         if (len_src > 0) {
1503             cur_byte = cpu_ldub_data_ra(env, src, ra);
1504         }
1505 
1506         len_dest--;
1507         dest--;
1508 
1509         /* only advance one nibble at a time */
1510         if (second_nibble) {
1511             cur_byte >>= 4;
1512             len_src--;
1513             src--;
1514         }
1515         second_nibble = !second_nibble;
1516 
1517         /* digit */
1518         cur_byte = (cur_byte & 0xf);
1519         /* zone bits */
1520         cur_byte |= 0xf0;
1521 
1522         cpu_stb_data_ra(env, dest, cur_byte, ra);
1523     }
1524 }
1525 
1526 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1527                                  uint32_t destlen, int dsize, uint64_t src,
1528                                  uintptr_t ra)
1529 {
1530     int i;
1531     uint32_t cc;
1532     uint8_t b;
1533     /* The source operand is always 16 bytes long.  */
1534     const int srclen = 16;
1535 
1536     /* The operands are processed from right to left.  */
1537     src += srclen - 1;
1538     dest += destlen - dsize;
1539 
1540     /* Check for the sign.  */
1541     b = cpu_ldub_data_ra(env, src, ra);
1542     src--;
1543     switch (b & 0xf) {
1544     case 0xa:
1545     case 0xc:
1546     case 0xe ... 0xf:
1547         cc = 0;  /* plus */
1548         break;
1549     case 0xb:
1550     case 0xd:
1551         cc = 1;  /* minus */
1552         break;
1553     default:
1554     case 0x0 ... 0x9:
1555         cc = 3;  /* invalid */
1556         break;
1557     }
1558 
1559     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1560     for (i = 0; i < destlen; i += dsize) {
1561         if (i == (31 * dsize)) {
1562             /* If length is 32/64 bytes, the leftmost byte is 0. */
1563             b = 0;
1564         } else if (i % (2 * dsize)) {
1565             b = cpu_ldub_data_ra(env, src, ra);
1566             src--;
1567         } else {
1568             b >>= 4;
1569         }
1570         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1571         dest -= dsize;
1572     }
1573 
1574     return cc;
1575 }
1576 
1577 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1578                        uint64_t src)
1579 {
1580     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1581 }
1582 
1583 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1584                        uint64_t src)
1585 {
1586     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1587 }
1588 
1589 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1590 {
1591     uintptr_t ra = GETPC();
1592     uint32_t cc = 0;
1593     int i;
1594 
1595     for (i = 0; i < destlen; i++) {
1596         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1597         /* digit */
1598         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1599 
1600         if (i == (destlen - 1)) {
1601             /* sign */
1602             cc |= (b & 0xf) < 0xa ? 1 : 0;
1603         } else {
1604             /* digit */
1605             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1606         }
1607     }
1608 
1609     return cc;
1610 }
1611 
1612 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1613                              uint64_t trans, uintptr_t ra)
1614 {
1615     uint32_t i;
1616 
1617     for (i = 0; i <= len; i++) {
1618         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1619         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1620         cpu_stb_data_ra(env, array + i, new_byte, ra);
1621     }
1622 
1623     return env->cc_op;
1624 }
1625 
1626 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1627                 uint64_t trans)
1628 {
1629     do_helper_tr(env, len, array, trans, GETPC());
1630 }
1631 
1632 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1633                    uint64_t len, uint64_t trans)
1634 {
1635     uintptr_t ra = GETPC();
1636     uint8_t end = env->regs[0] & 0xff;
1637     uint64_t l = len;
1638     uint64_t i;
1639     uint32_t cc = 0;
1640 
1641     if (!(env->psw.mask & PSW_MASK_64)) {
1642         array &= 0x7fffffff;
1643         l = (uint32_t)l;
1644     }
1645 
1646     /* Lest we fail to service interrupts in a timely manner, limit the
1647        amount of work we're willing to do.  For now, let's cap at 8k.  */
1648     if (l > 0x2000) {
1649         l = 0x2000;
1650         cc = 3;
1651     }
1652 
1653     for (i = 0; i < l; i++) {
1654         uint8_t byte, new_byte;
1655 
1656         byte = cpu_ldub_data_ra(env, array + i, ra);
1657 
1658         if (byte == end) {
1659             cc = 1;
1660             break;
1661         }
1662 
1663         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1664         cpu_stb_data_ra(env, array + i, new_byte, ra);
1665     }
1666 
1667     env->cc_op = cc;
1668     return int128_make128(len - i, array + i);
1669 }
1670 
1671 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1672                                      uint64_t array, uint64_t trans,
1673                                      int inc, uintptr_t ra)
1674 {
1675     int i;
1676 
1677     for (i = 0; i <= len; i++) {
1678         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1679         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1680 
1681         if (sbyte != 0) {
1682             set_address(env, 1, array + i * inc);
1683             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1684             return (i == len) ? 2 : 1;
1685         }
1686     }
1687 
1688     return 0;
1689 }
1690 
1691 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1692                                   uint64_t array, uint64_t trans,
1693                                   uintptr_t ra)
1694 {
1695     return do_helper_trt(env, len, array, trans, 1, ra);
1696 }
1697 
1698 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1699                      uint64_t trans)
1700 {
1701     return do_helper_trt(env, len, array, trans, 1, GETPC());
1702 }
1703 
1704 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1705                                    uint64_t array, uint64_t trans,
1706                                    uintptr_t ra)
1707 {
1708     return do_helper_trt(env, len, array, trans, -1, ra);
1709 }
1710 
1711 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1712                       uint64_t trans)
1713 {
1714     return do_helper_trt(env, len, array, trans, -1, GETPC());
1715 }
1716 
1717 /* Translate one/two to one/two */
1718 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1719                       uint32_t tst, uint32_t sizes)
1720 {
1721     uintptr_t ra = GETPC();
1722     int dsize = (sizes & 1) ? 1 : 2;
1723     int ssize = (sizes & 2) ? 1 : 2;
1724     uint64_t tbl = get_address(env, 1);
1725     uint64_t dst = get_address(env, r1);
1726     uint64_t len = get_length(env, r1 + 1);
1727     uint64_t src = get_address(env, r2);
1728     uint32_t cc = 3;
1729     int i;
1730 
1731     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1732        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1733        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1734     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1735         tbl &= -4096;
1736     } else {
1737         tbl &= -8;
1738     }
1739 
1740     check_alignment(env, len, ssize, ra);
1741 
1742     /* Lest we fail to service interrupts in a timely manner, */
1743     /* limit the amount of work we're willing to do.   */
1744     for (i = 0; i < 0x2000; i++) {
1745         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1746         uint64_t tble = tbl + (sval * dsize);
1747         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1748         if (dval == tst) {
1749             cc = 1;
1750             break;
1751         }
1752         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1753 
1754         len -= ssize;
1755         src += ssize;
1756         dst += dsize;
1757 
1758         if (len == 0) {
1759             cc = 0;
1760             break;
1761         }
1762     }
1763 
1764     set_address(env, r1, dst);
1765     set_length(env, r1 + 1, len);
1766     set_address(env, r2, src);
1767 
1768     return cc;
1769 }
1770 
1771 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1772                         uint64_t a2, bool parallel)
1773 {
1774     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1775     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1776     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1777     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1778     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1779     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1780     uintptr_t ra = GETPC();
1781     uint32_t fc = extract32(env->regs[0], 0, 8);
1782     uint32_t sc = extract32(env->regs[0], 8, 8);
1783     uint64_t pl = get_address(env, 1) & -16;
1784     uint64_t svh, svl;
1785     uint32_t cc;
1786 
1787     /* Sanity check the function code and storage characteristic.  */
1788     if (fc > 1 || sc > 3) {
1789         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1790             goto spec_exception;
1791         }
1792         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1793             goto spec_exception;
1794         }
1795     }
1796 
1797     /* Sanity check the alignments.  */
1798     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1799         goto spec_exception;
1800     }
1801 
1802     /* Sanity check writability of the store address.  */
1803     probe_write(env, a2, 1 << sc, mem_idx, ra);
1804 
1805     /*
1806      * Note that the compare-and-swap is atomic, and the store is atomic,
1807      * but the complete operation is not.  Therefore we do not need to
1808      * assert serial context in order to implement this.  That said,
1809      * restart early if we can't support either operation that is supposed
1810      * to be atomic.
1811      */
1812     if (parallel) {
1813         uint32_t max = 2;
1814 #ifdef CONFIG_ATOMIC64
1815         max = 3;
1816 #endif
1817         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1818             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1819             cpu_loop_exit_atomic(env_cpu(env), ra);
1820         }
1821     }
1822 
1823     /*
1824      * All loads happen before all stores.  For simplicity, load the entire
1825      * store value area from the parameter list.
1826      */
1827     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1828     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1829 
1830     switch (fc) {
1831     case 0:
1832         {
1833             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1834             uint32_t cv = env->regs[r3];
1835             uint32_t ov;
1836 
1837             if (parallel) {
1838                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1839             } else {
1840                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1841                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1842             }
1843             cc = (ov != cv);
1844             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1845         }
1846         break;
1847 
1848     case 1:
1849         {
1850             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1851             uint64_t cv = env->regs[r3];
1852             uint64_t ov;
1853 
1854             if (parallel) {
1855 #ifdef CONFIG_ATOMIC64
1856                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1857 #else
1858                 /* Note that we asserted !parallel above.  */
1859                 g_assert_not_reached();
1860 #endif
1861             } else {
1862                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1863                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1864             }
1865             cc = (ov != cv);
1866             env->regs[r3] = ov;
1867         }
1868         break;
1869 
1870     case 2:
1871         {
1872             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1873             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1874             Int128 ov;
1875 
1876             if (!parallel) {
1877                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1878                 cc = !int128_eq(ov, cv);
1879                 if (cc) {
1880                     nv = ov;
1881                 }
1882                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1883             } else if (HAVE_CMPXCHG128) {
1884                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1885                 cc = !int128_eq(ov, cv);
1886             } else {
1887                 /* Note that we asserted !parallel above.  */
1888                 g_assert_not_reached();
1889             }
1890 
1891             env->regs[r3 + 0] = int128_gethi(ov);
1892             env->regs[r3 + 1] = int128_getlo(ov);
1893         }
1894         break;
1895 
1896     default:
1897         g_assert_not_reached();
1898     }
1899 
1900     /* Store only if the comparison succeeded.  Note that above we use a pair
1901        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1902        from the most-significant bits of svh.  */
1903     if (cc == 0) {
1904         switch (sc) {
1905         case 0:
1906             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1907             break;
1908         case 1:
1909             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1910             break;
1911         case 2:
1912             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1913             break;
1914         case 3:
1915             cpu_stq_mmu(env, a2, svh, oi8, ra);
1916             break;
1917         case 4:
1918             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1919             break;
1920         default:
1921             g_assert_not_reached();
1922         }
1923     }
1924 
1925     return cc;
1926 
1927  spec_exception:
1928     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1929 }
1930 
1931 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1932 {
1933     return do_csst(env, r3, a1, a2, false);
1934 }
1935 
1936 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1937                                uint64_t a2)
1938 {
1939     return do_csst(env, r3, a1, a2, true);
1940 }
1941 
1942 #if !defined(CONFIG_USER_ONLY)
1943 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1944 {
1945     uintptr_t ra = GETPC();
1946     bool PERchanged = false;
1947     uint64_t src = a2;
1948     uint32_t i;
1949 
1950     if (src & 0x7) {
1951         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1952     }
1953 
1954     for (i = r1;; i = (i + 1) % 16) {
1955         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1956         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1957             PERchanged = true;
1958         }
1959         env->cregs[i] = val;
1960         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1961                    i, src, val);
1962         src += sizeof(uint64_t);
1963 
1964         if (i == r3) {
1965             break;
1966         }
1967     }
1968 
1969     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1970         s390_cpu_recompute_watchpoints(env_cpu(env));
1971     }
1972 
1973     tlb_flush(env_cpu(env));
1974 }
1975 
1976 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1977 {
1978     uintptr_t ra = GETPC();
1979     bool PERchanged = false;
1980     uint64_t src = a2;
1981     uint32_t i;
1982 
1983     if (src & 0x3) {
1984         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1985     }
1986 
1987     for (i = r1;; i = (i + 1) % 16) {
1988         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1989         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1990             PERchanged = true;
1991         }
1992         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1993         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1994         src += sizeof(uint32_t);
1995 
1996         if (i == r3) {
1997             break;
1998         }
1999     }
2000 
2001     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2002         s390_cpu_recompute_watchpoints(env_cpu(env));
2003     }
2004 
2005     tlb_flush(env_cpu(env));
2006 }
2007 
2008 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2009 {
2010     uintptr_t ra = GETPC();
2011     uint64_t dest = a2;
2012     uint32_t i;
2013 
2014     if (dest & 0x7) {
2015         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2016     }
2017 
2018     for (i = r1;; i = (i + 1) % 16) {
2019         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2020         dest += sizeof(uint64_t);
2021 
2022         if (i == r3) {
2023             break;
2024         }
2025     }
2026 }
2027 
2028 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2029 {
2030     uintptr_t ra = GETPC();
2031     uint64_t dest = a2;
2032     uint32_t i;
2033 
2034     if (dest & 0x3) {
2035         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2036     }
2037 
2038     for (i = r1;; i = (i + 1) % 16) {
2039         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2040         dest += sizeof(uint32_t);
2041 
2042         if (i == r3) {
2043             break;
2044         }
2045     }
2046 }
2047 
2048 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2049 {
2050     uintptr_t ra = GETPC();
2051     int i;
2052 
2053     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2054 
2055     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2056         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2057     }
2058 
2059     return 0;
2060 }
2061 
2062 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2063 {
2064     S390CPU *cpu = env_archcpu(env);
2065     CPUState *cs = env_cpu(env);
2066 
2067     /*
2068      * TODO: we currently don't handle all access protection types
2069      * (including access-list and key-controlled) as well as AR mode.
2070      */
2071     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2072         /* Fetching permitted; storing permitted */
2073         return 0;
2074     }
2075 
2076     if (env->int_pgm_code == PGM_PROTECTION) {
2077         /* retry if reading is possible */
2078         cs->exception_index = -1;
2079         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2080             /* Fetching permitted; storing not permitted */
2081             return 1;
2082         }
2083     }
2084 
2085     switch (env->int_pgm_code) {
2086     case PGM_PROTECTION:
2087         /* Fetching not permitted; storing not permitted */
2088         cs->exception_index = -1;
2089         return 2;
2090     case PGM_ADDRESSING:
2091     case PGM_TRANS_SPEC:
2092         /* exceptions forwarded to the guest */
2093         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2094         return 0;
2095     }
2096 
2097     /* Translation not available */
2098     cs->exception_index = -1;
2099     return 3;
2100 }
2101 
2102 /* insert storage key extended */
2103 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2104 {
2105     static S390SKeysState *ss;
2106     static S390SKeysClass *skeyclass;
2107     uint64_t addr = wrap_address(env, r2);
2108     uint8_t key;
2109     int rc;
2110 
2111     addr = mmu_real2abs(env, addr);
2112     if (!mmu_absolute_addr_valid(addr, false)) {
2113         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2114     }
2115 
2116     if (unlikely(!ss)) {
2117         ss = s390_get_skeys_device();
2118         skeyclass = S390_SKEYS_GET_CLASS(ss);
2119         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2120             tlb_flush_all_cpus_synced(env_cpu(env));
2121         }
2122     }
2123 
2124     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2125     if (rc) {
2126         return 0;
2127     }
2128     return key;
2129 }
2130 
2131 /* set storage key extended */
2132 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2133 {
2134     static S390SKeysState *ss;
2135     static S390SKeysClass *skeyclass;
2136     uint64_t addr = wrap_address(env, r2);
2137     uint8_t key;
2138 
2139     addr = mmu_real2abs(env, addr);
2140     if (!mmu_absolute_addr_valid(addr, false)) {
2141         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2142     }
2143 
2144     if (unlikely(!ss)) {
2145         ss = s390_get_skeys_device();
2146         skeyclass = S390_SKEYS_GET_CLASS(ss);
2147         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2148             tlb_flush_all_cpus_synced(env_cpu(env));
2149         }
2150     }
2151 
2152     key = r1 & 0xfe;
2153     s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2154    /*
2155     * As we can only flush by virtual address and not all the entries
2156     * that point to a physical address we have to flush the whole TLB.
2157     */
2158     tlb_flush_all_cpus_synced(env_cpu(env));
2159 }
2160 
2161 /* reset reference bit extended */
2162 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2163 {
2164     uint64_t addr = wrap_address(env, r2);
2165     static S390SKeysState *ss;
2166     static S390SKeysClass *skeyclass;
2167     uint8_t re, key;
2168     int rc;
2169 
2170     addr = mmu_real2abs(env, addr);
2171     if (!mmu_absolute_addr_valid(addr, false)) {
2172         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2173     }
2174 
2175     if (unlikely(!ss)) {
2176         ss = s390_get_skeys_device();
2177         skeyclass = S390_SKEYS_GET_CLASS(ss);
2178         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2179             tlb_flush_all_cpus_synced(env_cpu(env));
2180         }
2181     }
2182 
2183     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2184     if (rc) {
2185         return 0;
2186     }
2187 
2188     re = key & (SK_R | SK_C);
2189     key &= ~SK_R;
2190 
2191     rc = s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2192     if (rc) {
2193         return 0;
2194     }
2195    /*
2196     * As we can only flush by virtual address and not all the entries
2197     * that point to a physical address we have to flush the whole TLB.
2198     */
2199     tlb_flush_all_cpus_synced(env_cpu(env));
2200 
2201     /*
2202      * cc
2203      *
2204      * 0  Reference bit zero; change bit zero
2205      * 1  Reference bit zero; change bit one
2206      * 2  Reference bit one; change bit zero
2207      * 3  Reference bit one; change bit one
2208      */
2209 
2210     return re >> 1;
2211 }
2212 
2213 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2214                       uint64_t key)
2215 {
2216     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2217     S390Access srca, desta;
2218     uintptr_t ra = GETPC();
2219     int cc = 0;
2220 
2221     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2222                __func__, l, a1, a2);
2223 
2224     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2225         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2226         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2227     }
2228 
2229     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2230         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2231     }
2232 
2233     l = wrap_length32(env, l);
2234     if (l > 256) {
2235         /* max 256 */
2236         l = 256;
2237         cc = 3;
2238     } else if (!l) {
2239         return cc;
2240     }
2241 
2242     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2243     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2244     access_memmove(env, &desta, &srca, ra);
2245     return cc;
2246 }
2247 
2248 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2249                       uint64_t key)
2250 {
2251     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2252     S390Access srca, desta;
2253     uintptr_t ra = GETPC();
2254     int cc = 0;
2255 
2256     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2257                __func__, l, a1, a2);
2258 
2259     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2260         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2261         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2262     }
2263 
2264     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2265         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2266     }
2267 
2268     l = wrap_length32(env, l);
2269     if (l > 256) {
2270         /* max 256 */
2271         l = 256;
2272         cc = 3;
2273     } else if (!l) {
2274         return cc;
2275     }
2276     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2277     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2278     access_memmove(env, &desta, &srca, ra);
2279     return cc;
2280 }
2281 
2282 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2283 {
2284     CPUState *cs = env_cpu(env);
2285     const uintptr_t ra = GETPC();
2286     uint64_t table, entry, raddr;
2287     uint16_t entries, i, index = 0;
2288 
2289     if (r2 & 0xff000) {
2290         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2291     }
2292 
2293     if (!(r2 & 0x800)) {
2294         /* invalidation-and-clearing operation */
2295         table = r1 & ASCE_ORIGIN;
2296         entries = (r2 & 0x7ff) + 1;
2297 
2298         switch (r1 & ASCE_TYPE_MASK) {
2299         case ASCE_TYPE_REGION1:
2300             index = (r2 >> 53) & 0x7ff;
2301             break;
2302         case ASCE_TYPE_REGION2:
2303             index = (r2 >> 42) & 0x7ff;
2304             break;
2305         case ASCE_TYPE_REGION3:
2306             index = (r2 >> 31) & 0x7ff;
2307             break;
2308         case ASCE_TYPE_SEGMENT:
2309             index = (r2 >> 20) & 0x7ff;
2310             break;
2311         }
2312         for (i = 0; i < entries; i++) {
2313             /* addresses are not wrapped in 24/31bit mode but table index is */
2314             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2315             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2316             if (!(entry & REGION_ENTRY_I)) {
2317                 /* we are allowed to not store if already invalid */
2318                 entry |= REGION_ENTRY_I;
2319                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2320             }
2321         }
2322     }
2323 
2324     /* We simply flush the complete tlb, therefore we can ignore r3. */
2325     if (m4 & 1) {
2326         tlb_flush(cs);
2327     } else {
2328         tlb_flush_all_cpus_synced(cs);
2329     }
2330 }
2331 
2332 /* invalidate pte */
2333 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2334                   uint32_t m4)
2335 {
2336     CPUState *cs = env_cpu(env);
2337     const uintptr_t ra = GETPC();
2338     uint64_t page = vaddr & TARGET_PAGE_MASK;
2339     uint64_t pte_addr, pte;
2340 
2341     /* Compute the page table entry address */
2342     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2343     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2344 
2345     /* Mark the page table entry as invalid */
2346     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2347     pte |= PAGE_ENTRY_I;
2348     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2349 
2350     /* XXX we exploit the fact that Linux passes the exact virtual
2351        address here - it's not obliged to! */
2352     if (m4 & 1) {
2353         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2354             tlb_flush_page(cs, page);
2355             /* XXX 31-bit hack */
2356             tlb_flush_page(cs, page ^ 0x80000000);
2357         } else {
2358             /* looks like we don't have a valid virtual address */
2359             tlb_flush(cs);
2360         }
2361     } else {
2362         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2363             tlb_flush_page_all_cpus_synced(cs, page);
2364             /* XXX 31-bit hack */
2365             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2366         } else {
2367             /* looks like we don't have a valid virtual address */
2368             tlb_flush_all_cpus_synced(cs);
2369         }
2370     }
2371 }
2372 
2373 /* flush local tlb */
2374 void HELPER(ptlb)(CPUS390XState *env)
2375 {
2376     tlb_flush(env_cpu(env));
2377 }
2378 
2379 /* flush global tlb */
2380 void HELPER(purge)(CPUS390XState *env)
2381 {
2382     tlb_flush_all_cpus_synced(env_cpu(env));
2383 }
2384 
2385 /* load real address */
2386 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2387 {
2388     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2389     uint64_t ret, tec;
2390     int flags, exc, cc;
2391 
2392     /* XXX incomplete - has more corner cases */
2393     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2394         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2395     }
2396 
2397     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2398     if (exc) {
2399         cc = 3;
2400         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2401     } else {
2402         cc = 0;
2403         ret |= addr & ~TARGET_PAGE_MASK;
2404     }
2405 
2406     env->cc_op = cc;
2407     return ret;
2408 }
2409 #endif
2410 
2411 /* Execute instruction.  This instruction executes an insn modified with
2412    the contents of r1.  It does not change the executed instruction in memory;
2413    it does not change the program counter.
2414 
2415    Perform this by recording the modified instruction in env->ex_value.
2416    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2417 */
2418 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2419 {
2420     uint64_t insn;
2421     uint8_t opc;
2422 
2423     /* EXECUTE targets must be at even addresses.  */
2424     if (addr & 1) {
2425         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2426     }
2427 
2428     insn = cpu_lduw_code(env, addr);
2429     opc = insn >> 8;
2430 
2431     /* Or in the contents of R1[56:63].  */
2432     insn |= r1 & 0xff;
2433 
2434     /* Load the rest of the instruction.  */
2435     insn <<= 48;
2436     switch (get_ilen(opc)) {
2437     case 2:
2438         break;
2439     case 4:
2440         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2441         break;
2442     case 6:
2443         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2444         break;
2445     default:
2446         g_assert_not_reached();
2447     }
2448 
2449     /* The very most common cases can be sped up by avoiding a new TB.  */
2450     if ((opc & 0xf0) == 0xd0) {
2451         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2452                                       uint64_t, uintptr_t);
2453         static const dx_helper dx[16] = {
2454             [0x0] = do_helper_trt_bkwd,
2455             [0x2] = do_helper_mvc,
2456             [0x4] = do_helper_nc,
2457             [0x5] = do_helper_clc,
2458             [0x6] = do_helper_oc,
2459             [0x7] = do_helper_xc,
2460             [0xc] = do_helper_tr,
2461             [0xd] = do_helper_trt_fwd,
2462         };
2463         dx_helper helper = dx[opc & 0xf];
2464 
2465         if (helper) {
2466             uint32_t l = extract64(insn, 48, 8);
2467             uint32_t b1 = extract64(insn, 44, 4);
2468             uint32_t d1 = extract64(insn, 32, 12);
2469             uint32_t b2 = extract64(insn, 28, 4);
2470             uint32_t d2 = extract64(insn, 16, 12);
2471             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2472             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2473 
2474             env->cc_op = helper(env, l, a1, a2, 0);
2475             env->psw.addr += ilen;
2476             return;
2477         }
2478     } else if (opc == 0x0a) {
2479         env->int_svc_code = extract64(insn, 48, 8);
2480         env->int_svc_ilen = ilen;
2481         helper_exception(env, EXCP_SVC);
2482         g_assert_not_reached();
2483     }
2484 
2485     /* Record the insn we want to execute as well as the ilen to use
2486        during the execution of the target insn.  This will also ensure
2487        that ex_value is non-zero, which flags that we are in a state
2488        that requires such execution.  */
2489     env->ex_value = insn | ilen;
2490     env->ex_target = addr;
2491 }
2492 
2493 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2494                        uint64_t len)
2495 {
2496     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2497     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2498     const uint64_t r0 = env->regs[0];
2499     const uintptr_t ra = GETPC();
2500     uint8_t dest_key, dest_as, dest_k, dest_a;
2501     uint8_t src_key, src_as, src_k, src_a;
2502     uint64_t val;
2503     int cc = 0;
2504 
2505     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2506                __func__, dest, src, len);
2507 
2508     if (!(env->psw.mask & PSW_MASK_DAT)) {
2509         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2510     }
2511 
2512     /* OAC (operand access control) for the first operand -> dest */
2513     val = (r0 & 0xffff0000ULL) >> 16;
2514     dest_key = (val >> 12) & 0xf;
2515     dest_as = (val >> 6) & 0x3;
2516     dest_k = (val >> 1) & 0x1;
2517     dest_a = val & 0x1;
2518 
2519     /* OAC (operand access control) for the second operand -> src */
2520     val = (r0 & 0x0000ffffULL);
2521     src_key = (val >> 12) & 0xf;
2522     src_as = (val >> 6) & 0x3;
2523     src_k = (val >> 1) & 0x1;
2524     src_a = val & 0x1;
2525 
2526     if (!dest_k) {
2527         dest_key = psw_key;
2528     }
2529     if (!src_k) {
2530         src_key = psw_key;
2531     }
2532     if (!dest_a) {
2533         dest_as = psw_as;
2534     }
2535     if (!src_a) {
2536         src_as = psw_as;
2537     }
2538 
2539     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2540         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2541     }
2542     if (!(env->cregs[0] & CR0_SECONDARY) &&
2543         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2544         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2545     }
2546     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2547         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2548     }
2549 
2550     len = wrap_length32(env, len);
2551     if (len > 4096) {
2552         cc = 3;
2553         len = 4096;
2554     }
2555 
2556     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2557     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2558         (env->psw.mask & PSW_MASK_PSTATE)) {
2559         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2560                       __func__);
2561         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2562     }
2563 
2564     /* FIXME: Access using correct keys and AR-mode */
2565     if (len) {
2566         S390Access srca, desta;
2567 
2568         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2569                        mmu_idx_from_as(src_as), ra);
2570         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2571                        mmu_idx_from_as(dest_as), ra);
2572 
2573         access_memmove(env, &desta, &srca, ra);
2574     }
2575 
2576     return cc;
2577 }
2578 
2579 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2580    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2581    value >= 0 indicates failure, and the CC value to be returned.  */
2582 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2583                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2584                                  uint32_t *ochar, uint32_t *olen);
2585 
2586 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2587    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2588    indicates failure, and the CC value to be returned.  */
2589 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2590                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2591                                  uint32_t *olen);
2592 
2593 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2594                        bool enh_check, uintptr_t ra,
2595                        uint32_t *ochar, uint32_t *olen)
2596 {
2597     uint8_t s0, s1, s2, s3;
2598     uint32_t c, l;
2599 
2600     if (ilen < 1) {
2601         return 0;
2602     }
2603     s0 = cpu_ldub_data_ra(env, addr, ra);
2604     if (s0 <= 0x7f) {
2605         /* one byte character */
2606         l = 1;
2607         c = s0;
2608     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2609         /* invalid character */
2610         return 2;
2611     } else if (s0 <= 0xdf) {
2612         /* two byte character */
2613         l = 2;
2614         if (ilen < 2) {
2615             return 0;
2616         }
2617         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2618         c = s0 & 0x1f;
2619         c = (c << 6) | (s1 & 0x3f);
2620         if (enh_check && (s1 & 0xc0) != 0x80) {
2621             return 2;
2622         }
2623     } else if (s0 <= 0xef) {
2624         /* three byte character */
2625         l = 3;
2626         if (ilen < 3) {
2627             return 0;
2628         }
2629         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2630         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2631         c = s0 & 0x0f;
2632         c = (c << 6) | (s1 & 0x3f);
2633         c = (c << 6) | (s2 & 0x3f);
2634         /* Fold the byte-by-byte range descriptions in the PoO into
2635            tests against the complete value.  It disallows encodings
2636            that could be smaller, and the UTF-16 surrogates.  */
2637         if (enh_check
2638             && ((s1 & 0xc0) != 0x80
2639                 || (s2 & 0xc0) != 0x80
2640                 || c < 0x1000
2641                 || (c >= 0xd800 && c <= 0xdfff))) {
2642             return 2;
2643         }
2644     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2645         /* four byte character */
2646         l = 4;
2647         if (ilen < 4) {
2648             return 0;
2649         }
2650         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2651         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2652         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2653         c = s0 & 0x07;
2654         c = (c << 6) | (s1 & 0x3f);
2655         c = (c << 6) | (s2 & 0x3f);
2656         c = (c << 6) | (s3 & 0x3f);
2657         /* See above.  */
2658         if (enh_check
2659             && ((s1 & 0xc0) != 0x80
2660                 || (s2 & 0xc0) != 0x80
2661                 || (s3 & 0xc0) != 0x80
2662                 || c < 0x010000
2663                 || c > 0x10ffff)) {
2664             return 2;
2665         }
2666     } else {
2667         /* invalid character */
2668         return 2;
2669     }
2670 
2671     *ochar = c;
2672     *olen = l;
2673     return -1;
2674 }
2675 
2676 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2677                         bool enh_check, uintptr_t ra,
2678                         uint32_t *ochar, uint32_t *olen)
2679 {
2680     uint16_t s0, s1;
2681     uint32_t c, l;
2682 
2683     if (ilen < 2) {
2684         return 0;
2685     }
2686     s0 = cpu_lduw_data_ra(env, addr, ra);
2687     if ((s0 & 0xfc00) != 0xd800) {
2688         /* one word character */
2689         l = 2;
2690         c = s0;
2691     } else {
2692         /* two word character */
2693         l = 4;
2694         if (ilen < 4) {
2695             return 0;
2696         }
2697         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2698         c = extract32(s0, 6, 4) + 1;
2699         c = (c << 6) | (s0 & 0x3f);
2700         c = (c << 10) | (s1 & 0x3ff);
2701         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2702             /* invalid surrogate character */
2703             return 2;
2704         }
2705     }
2706 
2707     *ochar = c;
2708     *olen = l;
2709     return -1;
2710 }
2711 
2712 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2713                         bool enh_check, uintptr_t ra,
2714                         uint32_t *ochar, uint32_t *olen)
2715 {
2716     uint32_t c;
2717 
2718     if (ilen < 4) {
2719         return 0;
2720     }
2721     c = cpu_ldl_data_ra(env, addr, ra);
2722     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2723         /* invalid unicode character */
2724         return 2;
2725     }
2726 
2727     *ochar = c;
2728     *olen = 4;
2729     return -1;
2730 }
2731 
2732 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2733                        uintptr_t ra, uint32_t c, uint32_t *olen)
2734 {
2735     uint8_t d[4];
2736     uint32_t l, i;
2737 
2738     if (c <= 0x7f) {
2739         /* one byte character */
2740         l = 1;
2741         d[0] = c;
2742     } else if (c <= 0x7ff) {
2743         /* two byte character */
2744         l = 2;
2745         d[1] = 0x80 | extract32(c, 0, 6);
2746         d[0] = 0xc0 | extract32(c, 6, 5);
2747     } else if (c <= 0xffff) {
2748         /* three byte character */
2749         l = 3;
2750         d[2] = 0x80 | extract32(c, 0, 6);
2751         d[1] = 0x80 | extract32(c, 6, 6);
2752         d[0] = 0xe0 | extract32(c, 12, 4);
2753     } else {
2754         /* four byte character */
2755         l = 4;
2756         d[3] = 0x80 | extract32(c, 0, 6);
2757         d[2] = 0x80 | extract32(c, 6, 6);
2758         d[1] = 0x80 | extract32(c, 12, 6);
2759         d[0] = 0xf0 | extract32(c, 18, 3);
2760     }
2761 
2762     if (ilen < l) {
2763         return 1;
2764     }
2765     for (i = 0; i < l; ++i) {
2766         cpu_stb_data_ra(env, addr + i, d[i], ra);
2767     }
2768 
2769     *olen = l;
2770     return -1;
2771 }
2772 
2773 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2774                         uintptr_t ra, uint32_t c, uint32_t *olen)
2775 {
2776     uint16_t d0, d1;
2777 
2778     if (c <= 0xffff) {
2779         /* one word character */
2780         if (ilen < 2) {
2781             return 1;
2782         }
2783         cpu_stw_data_ra(env, addr, c, ra);
2784         *olen = 2;
2785     } else {
2786         /* two word character */
2787         if (ilen < 4) {
2788             return 1;
2789         }
2790         d1 = 0xdc00 | extract32(c, 0, 10);
2791         d0 = 0xd800 | extract32(c, 10, 6);
2792         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2793         cpu_stw_data_ra(env, addr + 0, d0, ra);
2794         cpu_stw_data_ra(env, addr + 2, d1, ra);
2795         *olen = 4;
2796     }
2797 
2798     return -1;
2799 }
2800 
2801 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2802                         uintptr_t ra, uint32_t c, uint32_t *olen)
2803 {
2804     if (ilen < 4) {
2805         return 1;
2806     }
2807     cpu_stl_data_ra(env, addr, c, ra);
2808     *olen = 4;
2809     return -1;
2810 }
2811 
2812 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2813                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2814                                        decode_unicode_fn decode,
2815                                        encode_unicode_fn encode)
2816 {
2817     uint64_t dst = get_address(env, r1);
2818     uint64_t dlen = get_length(env, r1 + 1);
2819     uint64_t src = get_address(env, r2);
2820     uint64_t slen = get_length(env, r2 + 1);
2821     bool enh_check = m3 & 1;
2822     int cc, i;
2823 
2824     /* Lest we fail to service interrupts in a timely manner, limit the
2825        amount of work we're willing to do.  For now, let's cap at 256.  */
2826     for (i = 0; i < 256; ++i) {
2827         uint32_t c, ilen, olen;
2828 
2829         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2830         if (unlikely(cc >= 0)) {
2831             break;
2832         }
2833         cc = encode(env, dst, dlen, ra, c, &olen);
2834         if (unlikely(cc >= 0)) {
2835             break;
2836         }
2837 
2838         src += ilen;
2839         slen -= ilen;
2840         dst += olen;
2841         dlen -= olen;
2842         cc = 3;
2843     }
2844 
2845     set_address(env, r1, dst);
2846     set_length(env, r1 + 1, dlen);
2847     set_address(env, r2, src);
2848     set_length(env, r2 + 1, slen);
2849 
2850     return cc;
2851 }
2852 
2853 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2854 {
2855     return convert_unicode(env, r1, r2, m3, GETPC(),
2856                            decode_utf8, encode_utf16);
2857 }
2858 
2859 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2860 {
2861     return convert_unicode(env, r1, r2, m3, GETPC(),
2862                            decode_utf8, encode_utf32);
2863 }
2864 
2865 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2866 {
2867     return convert_unicode(env, r1, r2, m3, GETPC(),
2868                            decode_utf16, encode_utf8);
2869 }
2870 
2871 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2872 {
2873     return convert_unicode(env, r1, r2, m3, GETPC(),
2874                            decode_utf16, encode_utf32);
2875 }
2876 
2877 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2878 {
2879     return convert_unicode(env, r1, r2, m3, GETPC(),
2880                            decode_utf32, encode_utf8);
2881 }
2882 
2883 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2884 {
2885     return convert_unicode(env, r1, r2, m3, GETPC(),
2886                            decode_utf32, encode_utf16);
2887 }
2888 
2889 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2890                         uintptr_t ra)
2891 {
2892     const int mmu_idx = s390x_env_mmu_index(env, false);
2893 
2894     /* test the actual access, not just any access to the page due to LAP */
2895     while (len) {
2896         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2897         const uint64_t curlen = MIN(pagelen, len);
2898 
2899         probe_write(env, addr, curlen, mmu_idx, ra);
2900         addr = wrap_address(env, addr + curlen);
2901         len -= curlen;
2902     }
2903 }
2904 
2905 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2906 {
2907     probe_write_access(env, addr, len, GETPC());
2908 }
2909