xref: /qemu/target/s390x/tcg/mem_helper.c (revision 641f1c53862aec64810c0b93b5b1de49d55fda92)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/cpu-common.h"
28 #include "exec/exec-all.h"
29 #include "exec/cputlb.h"
30 #include "exec/page-protection.h"
31 #include "accel/tcg/cpu-ldst.h"
32 #include "exec/target_page.h"
33 #include "exec/tlb-flags.h"
34 #include "accel/tcg/cpu-ops.h"
35 #include "qemu/int128.h"
36 #include "qemu/atomic128.h"
37 
38 #if defined(CONFIG_USER_ONLY)
39 #include "user/page-protection.h"
40 #else
41 #include "hw/s390x/storage-keys.h"
42 #include "hw/boards.h"
43 #endif
44 
45 #ifdef CONFIG_USER_ONLY
46 # define user_or_likely(X)    true
47 #else
48 # define user_or_likely(X)    likely(X)
49 #endif
50 
51 /*****************************************************************************/
52 /* Softmmu support */
53 
54 /* #define DEBUG_HELPER */
55 #ifdef DEBUG_HELPER
56 #define HELPER_LOG(x...) qemu_log(x)
57 #else
58 #define HELPER_LOG(x...)
59 #endif
60 
61 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
62 {
63     uint16_t pkm = env->cregs[3] >> 16;
64 
65     if (env->psw.mask & PSW_MASK_PSTATE) {
66         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
67         return pkm & (0x8000 >> psw_key);
68     }
69     return true;
70 }
71 
72 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
73                                    uint64_t src, uint32_t len)
74 {
75     if (!len || src == dest) {
76         return false;
77     }
78     /* Take care of wrapping at the end of address space. */
79     if (unlikely(wrap_address(env, src + len - 1) < src)) {
80         return dest > src || dest <= wrap_address(env, src + len - 1);
81     }
82     return dest > src && dest <= src + len - 1;
83 }
84 
85 /* Trigger a SPECIFICATION exception if an address or a length is not
86    naturally aligned.  */
87 static inline void check_alignment(CPUS390XState *env, uint64_t v,
88                                    int wordsize, uintptr_t ra)
89 {
90     if (v % wordsize) {
91         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
92     }
93 }
94 
95 /* Load a value from memory according to its size.  */
96 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
97                                            int wordsize, uintptr_t ra)
98 {
99     switch (wordsize) {
100     case 1:
101         return cpu_ldub_data_ra(env, addr, ra);
102     case 2:
103         return cpu_lduw_data_ra(env, addr, ra);
104     default:
105         abort();
106     }
107 }
108 
109 /* Store a to memory according to its size.  */
110 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
111                                       uint64_t value, int wordsize,
112                                       uintptr_t ra)
113 {
114     switch (wordsize) {
115     case 1:
116         cpu_stb_data_ra(env, addr, value, ra);
117         break;
118     case 2:
119         cpu_stw_data_ra(env, addr, value, ra);
120         break;
121     default:
122         abort();
123     }
124 }
125 
126 /* An access covers at most 4096 bytes and therefore at most two pages. */
127 typedef struct S390Access {
128     target_ulong vaddr1;
129     target_ulong vaddr2;
130     void *haddr1;
131     void *haddr2;
132     uint16_t size1;
133     uint16_t size2;
134     /*
135      * If we can't access the host page directly, we'll have to do I/O access
136      * via ld/st helpers. These are internal details, so we store the
137      * mmu idx to do the access here instead of passing it around in the
138      * helpers.
139      */
140     int mmu_idx;
141 } S390Access;
142 
143 /*
144  * With nonfault=1, return the PGM_ exception that would have been injected
145  * into the guest; return 0 if no exception was detected.
146  *
147  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
148  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
149  */
150 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
151                                     int size, MMUAccessType access_type,
152                                     int mmu_idx, bool nonfault,
153                                     void **phost, uintptr_t ra)
154 {
155     int flags = probe_access_flags(env, addr, size, access_type, mmu_idx,
156                                    nonfault, phost, ra);
157 
158     if (unlikely(flags & TLB_INVALID_MASK)) {
159 #ifdef CONFIG_USER_ONLY
160         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
161         env->__excp_addr = addr & TARGET_PAGE_MASK;
162         return (page_get_flags(addr) & PAGE_VALID
163                 ? PGM_PROTECTION : PGM_ADDRESSING);
164 #else
165         return env->tlb_fill_exc;
166 #endif
167     }
168 
169 #ifndef CONFIG_USER_ONLY
170     if (unlikely(flags & TLB_WATCHPOINT)) {
171         /* S390 does not presently use transaction attributes. */
172         cpu_check_watchpoint(env_cpu(env), addr, size,
173                              MEMTXATTRS_UNSPECIFIED,
174                              (access_type == MMU_DATA_STORE
175                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
176     }
177 #endif
178 
179     return 0;
180 }
181 
182 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
183                              bool nonfault, vaddr vaddr1, int size,
184                              MMUAccessType access_type,
185                              int mmu_idx, uintptr_t ra)
186 {
187     int size1, size2, exc;
188 
189     assert(size > 0 && size <= 4096);
190 
191     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
192     size2 = size - size1;
193 
194     memset(access, 0, sizeof(*access));
195     access->vaddr1 = vaddr1;
196     access->size1 = size1;
197     access->size2 = size2;
198     access->mmu_idx = mmu_idx;
199 
200     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
201                             &access->haddr1, ra);
202     if (unlikely(exc)) {
203         return exc;
204     }
205     if (unlikely(size2)) {
206         /* The access crosses page boundaries. */
207         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
208 
209         access->vaddr2 = vaddr2;
210         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
211                                 nonfault, &access->haddr2, ra);
212         if (unlikely(exc)) {
213             return exc;
214         }
215     }
216     return 0;
217 }
218 
219 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
220                                   vaddr vaddr, int size,
221                                   MMUAccessType access_type, int mmu_idx,
222                                   uintptr_t ra)
223 {
224     int exc = access_prepare_nf(ret, env, false, vaddr, size,
225                                 access_type, mmu_idx, ra);
226     assert(!exc);
227 }
228 
229 /* Helper to handle memset on a single page. */
230 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
231                              uint8_t byte, uint16_t size, int mmu_idx,
232                              uintptr_t ra)
233 {
234     if (user_or_likely(haddr)) {
235         memset(haddr, byte, size);
236     } else {
237         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
238         for (int i = 0; i < size; i++) {
239             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
240         }
241     }
242 }
243 
244 static void access_memset(CPUS390XState *env, S390Access *desta,
245                           uint8_t byte, uintptr_t ra)
246 {
247     set_helper_retaddr(ra);
248     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
249                      desta->mmu_idx, ra);
250     if (unlikely(desta->size2)) {
251         do_access_memset(env, desta->vaddr2, desta->haddr2, byte,
252                          desta->size2, desta->mmu_idx, ra);
253     }
254     clear_helper_retaddr();
255 }
256 
257 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
258                                int offset, uintptr_t ra)
259 {
260     target_ulong vaddr = access->vaddr1;
261     void *haddr = access->haddr1;
262 
263     if (unlikely(offset >= access->size1)) {
264         offset -= access->size1;
265         vaddr = access->vaddr2;
266         haddr = access->haddr2;
267     }
268 
269     if (user_or_likely(haddr)) {
270         return ldub_p(haddr + offset);
271     } else {
272         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
273         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
274     }
275 }
276 
277 static void access_set_byte(CPUS390XState *env, S390Access *access,
278                             int offset, uint8_t byte, uintptr_t ra)
279 {
280     target_ulong vaddr = access->vaddr1;
281     void *haddr = access->haddr1;
282 
283     if (unlikely(offset >= access->size1)) {
284         offset -= access->size1;
285         vaddr = access->vaddr2;
286         haddr = access->haddr2;
287     }
288 
289     if (user_or_likely(haddr)) {
290         stb_p(haddr + offset, byte);
291     } else {
292         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
293         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
294     }
295 }
296 
297 /*
298  * Move data with the same semantics as memmove() in case ranges don't overlap
299  * or src > dest. Undefined behavior on destructive overlaps.
300  */
301 static void access_memmove(CPUS390XState *env, S390Access *desta,
302                            S390Access *srca, uintptr_t ra)
303 {
304     int len = desta->size1 + desta->size2;
305 
306     assert(len == srca->size1 + srca->size2);
307 
308     /* Fallback to slow access in case we don't have access to all host pages */
309     if (user_or_likely(desta->haddr1 &&
310                        srca->haddr1 &&
311                        (!desta->size2 || desta->haddr2) &&
312                        (!srca->size2 || srca->haddr2))) {
313         int diff = desta->size1 - srca->size1;
314 
315         if (likely(diff == 0)) {
316             memmove(desta->haddr1, srca->haddr1, srca->size1);
317             if (unlikely(srca->size2)) {
318                 memmove(desta->haddr2, srca->haddr2, srca->size2);
319             }
320         } else if (diff > 0) {
321             memmove(desta->haddr1, srca->haddr1, srca->size1);
322             memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
323             if (likely(desta->size2)) {
324                 memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
325             }
326         } else {
327             diff = -diff;
328             memmove(desta->haddr1, srca->haddr1, desta->size1);
329             memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
330             if (likely(srca->size2)) {
331                 memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
332             }
333         }
334     } else {
335         for (int i = 0; i < len; i++) {
336             uint8_t byte = access_get_byte(env, srca, i, ra);
337             access_set_byte(env, desta, i, byte, ra);
338         }
339     }
340 }
341 
342 static int mmu_idx_from_as(uint8_t as)
343 {
344     switch (as) {
345     case AS_PRIMARY:
346         return MMU_PRIMARY_IDX;
347     case AS_SECONDARY:
348         return MMU_SECONDARY_IDX;
349     case AS_HOME:
350         return MMU_HOME_IDX;
351     default:
352         /* FIXME AS_ACCREG */
353         g_assert_not_reached();
354     }
355 }
356 
357 /* and on array */
358 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
359                              uint64_t src, uintptr_t ra)
360 {
361     const int mmu_idx = s390x_env_mmu_index(env, false);
362     S390Access srca1, srca2, desta;
363     uint32_t i;
364     uint8_t c = 0;
365 
366     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
367                __func__, l, dest, src);
368 
369     /* NC always processes one more byte than specified - maximum is 256 */
370     l++;
371 
372     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
373     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
374     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
375     set_helper_retaddr(ra);
376 
377     for (i = 0; i < l; i++) {
378         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
379                           access_get_byte(env, &srca2, i, ra);
380 
381         c |= x;
382         access_set_byte(env, &desta, i, x, ra);
383     }
384 
385     clear_helper_retaddr();
386     return c != 0;
387 }
388 
389 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
390                     uint64_t src)
391 {
392     return do_helper_nc(env, l, dest, src, GETPC());
393 }
394 
395 /* xor on array */
396 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
397                              uint64_t src, uintptr_t ra)
398 {
399     const int mmu_idx = s390x_env_mmu_index(env, false);
400     S390Access srca1, srca2, desta;
401     uint32_t i;
402     uint8_t c = 0;
403 
404     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
405                __func__, l, dest, src);
406 
407     /* XC always processes one more byte than specified - maximum is 256 */
408     l++;
409 
410     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
411     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
412     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
413 
414     /* xor with itself is the same as memset(0) */
415     if (src == dest) {
416         access_memset(env, &desta, 0, ra);
417         return 0;
418     }
419 
420     set_helper_retaddr(ra);
421     for (i = 0; i < l; i++) {
422         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
423                           access_get_byte(env, &srca2, i, ra);
424 
425         c |= x;
426         access_set_byte(env, &desta, i, x, ra);
427     }
428     clear_helper_retaddr();
429     return c != 0;
430 }
431 
432 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
433                     uint64_t src)
434 {
435     return do_helper_xc(env, l, dest, src, GETPC());
436 }
437 
438 /* or on array */
439 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
440                              uint64_t src, uintptr_t ra)
441 {
442     const int mmu_idx = s390x_env_mmu_index(env, false);
443     S390Access srca1, srca2, desta;
444     uint32_t i;
445     uint8_t c = 0;
446 
447     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
448                __func__, l, dest, src);
449 
450     /* OC always processes one more byte than specified - maximum is 256 */
451     l++;
452 
453     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
454     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
455     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
456     set_helper_retaddr(ra);
457 
458     for (i = 0; i < l; i++) {
459         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
460                           access_get_byte(env, &srca2, i, ra);
461 
462         c |= x;
463         access_set_byte(env, &desta, i, x, ra);
464     }
465 
466     clear_helper_retaddr();
467     return c != 0;
468 }
469 
470 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
471                     uint64_t src)
472 {
473     return do_helper_oc(env, l, dest, src, GETPC());
474 }
475 
476 /* memmove */
477 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
478                               uint64_t src, uintptr_t ra)
479 {
480     const int mmu_idx = s390x_env_mmu_index(env, false);
481     S390Access srca, desta;
482     uint32_t i;
483 
484     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
485                __func__, l, dest, src);
486 
487     /* MVC always copies one more byte than specified - maximum is 256 */
488     l++;
489 
490     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
491     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
492 
493     /*
494      * "When the operands overlap, the result is obtained as if the operands
495      * were processed one byte at a time". Only non-destructive overlaps
496      * behave like memmove().
497      */
498     if (dest == src + 1) {
499         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
500     } else if (!is_destructive_overlap(env, dest, src, l)) {
501         access_memmove(env, &desta, &srca, ra);
502     } else {
503         set_helper_retaddr(ra);
504         for (i = 0; i < l; i++) {
505             uint8_t byte = access_get_byte(env, &srca, i, ra);
506 
507             access_set_byte(env, &desta, i, byte, ra);
508         }
509         clear_helper_retaddr();
510     }
511 
512     return env->cc_op;
513 }
514 
515 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
516 {
517     do_helper_mvc(env, l, dest, src, GETPC());
518 }
519 
520 /* move right to left */
521 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
522 {
523     const int mmu_idx = s390x_env_mmu_index(env, false);
524     const uint64_t ra = GETPC();
525     S390Access srca, desta;
526     int32_t i;
527 
528     /* MVCRL always copies one more byte than specified - maximum is 256 */
529     l &= 0xff;
530     l++;
531 
532     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
533     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
534 
535     set_helper_retaddr(ra);
536     for (i = l - 1; i >= 0; i--) {
537         uint8_t byte = access_get_byte(env, &srca, i, ra);
538         access_set_byte(env, &desta, i, byte, ra);
539     }
540     clear_helper_retaddr();
541 }
542 
543 /* move inverse  */
544 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
545 {
546     const int mmu_idx = s390x_env_mmu_index(env, false);
547     S390Access srca, desta;
548     uintptr_t ra = GETPC();
549     int i;
550 
551     /* MVCIN always copies one more byte than specified - maximum is 256 */
552     l++;
553 
554     src = wrap_address(env, src - l + 1);
555     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
556     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
557 
558     set_helper_retaddr(ra);
559     for (i = 0; i < l; i++) {
560         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
561         access_set_byte(env, &desta, i, x, ra);
562     }
563     clear_helper_retaddr();
564 }
565 
566 /* move numerics  */
567 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
568 {
569     const int mmu_idx = s390x_env_mmu_index(env, false);
570     S390Access srca1, srca2, desta;
571     uintptr_t ra = GETPC();
572     int i;
573 
574     /* MVN always copies one more byte than specified - maximum is 256 */
575     l++;
576 
577     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
578     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
579     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
580 
581     set_helper_retaddr(ra);
582     for (i = 0; i < l; i++) {
583         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
584                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
585 
586         access_set_byte(env, &desta, i, x, ra);
587     }
588     clear_helper_retaddr();
589 }
590 
591 /* move with offset  */
592 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
593 {
594     const int mmu_idx = s390x_env_mmu_index(env, false);
595     /* MVO always processes one more byte than specified - maximum is 16 */
596     const int len_dest = (l >> 4) + 1;
597     const int len_src = (l & 0xf) + 1;
598     uintptr_t ra = GETPC();
599     uint8_t byte_dest, byte_src;
600     S390Access srca, desta;
601     int i, j;
602 
603     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
604     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
605 
606     /* Handle rightmost byte */
607     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
608 
609     set_helper_retaddr(ra);
610     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
611     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
612     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
613 
614     /* Process remaining bytes from right to left */
615     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
616         byte_dest = byte_src >> 4;
617         if (j >= 0) {
618             byte_src = access_get_byte(env, &srca, j, ra);
619         } else {
620             byte_src = 0;
621         }
622         byte_dest |= byte_src << 4;
623         access_set_byte(env, &desta, i, byte_dest, ra);
624     }
625     clear_helper_retaddr();
626 }
627 
628 /* move zones  */
629 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
630 {
631     const int mmu_idx = s390x_env_mmu_index(env, false);
632     S390Access srca1, srca2, desta;
633     uintptr_t ra = GETPC();
634     int i;
635 
636     /* MVZ always copies one more byte than specified - maximum is 256 */
637     l++;
638 
639     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
640     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
641     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
642 
643     set_helper_retaddr(ra);
644     for (i = 0; i < l; i++) {
645         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
646                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
647 
648         access_set_byte(env, &desta, i, x, ra);
649     }
650     clear_helper_retaddr();
651 }
652 
653 /* compare unsigned byte arrays */
654 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
655                               uint64_t s2, uintptr_t ra)
656 {
657     uint32_t i;
658     uint32_t cc = 0;
659 
660     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
661                __func__, l, s1, s2);
662 
663     for (i = 0; i <= l; i++) {
664         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
665         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
666         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
667         if (x < y) {
668             cc = 1;
669             break;
670         } else if (x > y) {
671             cc = 2;
672             break;
673         }
674     }
675 
676     HELPER_LOG("\n");
677     return cc;
678 }
679 
680 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
681 {
682     return do_helper_clc(env, l, s1, s2, GETPC());
683 }
684 
685 /* compare logical under mask */
686 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
687                      uint64_t addr)
688 {
689     uintptr_t ra = GETPC();
690     uint32_t cc = 0;
691 
692     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
693                mask, addr);
694 
695     if (!mask) {
696         /* Recognize access exceptions for the first byte */
697         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
698     }
699 
700     while (mask) {
701         if (mask & 8) {
702             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
703             uint8_t r = extract32(r1, 24, 8);
704             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
705                        addr);
706             if (r < d) {
707                 cc = 1;
708                 break;
709             } else if (r > d) {
710                 cc = 2;
711                 break;
712             }
713             addr++;
714         }
715         mask = (mask << 1) & 0xf;
716         r1 <<= 8;
717     }
718 
719     HELPER_LOG("\n");
720     return cc;
721 }
722 
723 static inline uint64_t get_address(CPUS390XState *env, int reg)
724 {
725     return wrap_address(env, env->regs[reg]);
726 }
727 
728 /*
729  * Store the address to the given register, zeroing out unused leftmost
730  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
731  */
732 static inline void set_address_zero(CPUS390XState *env, int reg,
733                                     uint64_t address)
734 {
735     if (env->psw.mask & PSW_MASK_64) {
736         env->regs[reg] = address;
737     } else {
738         if (!(env->psw.mask & PSW_MASK_32)) {
739             address &= 0x00ffffff;
740         } else {
741             address &= 0x7fffffff;
742         }
743         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
744     }
745 }
746 
747 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
748 {
749     if (env->psw.mask & PSW_MASK_64) {
750         /* 64-Bit mode */
751         env->regs[reg] = address;
752     } else {
753         if (!(env->psw.mask & PSW_MASK_32)) {
754             /* 24-Bit mode. According to the PoO it is implementation
755             dependent if bits 32-39 remain unchanged or are set to
756             zeros.  Choose the former so that the function can also be
757             used for TRT.  */
758             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
759         } else {
760             /* 31-Bit mode. According to the PoO it is implementation
761             dependent if bit 32 remains unchanged or is set to zero.
762             Choose the latter so that the function can also be used for
763             TRT.  */
764             address &= 0x7fffffff;
765             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
766         }
767     }
768 }
769 
770 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
771 {
772     if (!(env->psw.mask & PSW_MASK_64)) {
773         return (uint32_t)length;
774     }
775     return length;
776 }
777 
778 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
779 {
780     if (!(env->psw.mask & PSW_MASK_64)) {
781         /* 24-Bit and 31-Bit mode */
782         length &= 0x7fffffff;
783     }
784     return length;
785 }
786 
787 static inline uint64_t get_length(CPUS390XState *env, int reg)
788 {
789     return wrap_length31(env, env->regs[reg]);
790 }
791 
792 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
793 {
794     if (env->psw.mask & PSW_MASK_64) {
795         /* 64-Bit mode */
796         env->regs[reg] = length;
797     } else {
798         /* 24-Bit and 31-Bit mode */
799         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
800     }
801 }
802 
803 /* search string (c is byte to search, r2 is string, r1 end of string) */
804 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
805 {
806     uintptr_t ra = GETPC();
807     uint64_t end, str;
808     uint32_t len;
809     uint8_t v, c = env->regs[0];
810 
811     /* Bits 32-55 must contain all 0.  */
812     if (env->regs[0] & 0xffffff00u) {
813         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
814     }
815 
816     str = get_address(env, r2);
817     end = get_address(env, r1);
818 
819     /* Lest we fail to service interrupts in a timely manner, limit the
820        amount of work we're willing to do.  For now, let's cap at 8k.  */
821     for (len = 0; len < 0x2000; ++len) {
822         if (str + len == end) {
823             /* Character not found.  R1 & R2 are unmodified.  */
824             env->cc_op = 2;
825             return;
826         }
827         v = cpu_ldub_data_ra(env, str + len, ra);
828         if (v == c) {
829             /* Character found.  Set R1 to the location; R2 is unmodified.  */
830             env->cc_op = 1;
831             set_address(env, r1, str + len);
832             return;
833         }
834     }
835 
836     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
837     env->cc_op = 3;
838     set_address(env, r2, str + len);
839 }
840 
841 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
842 {
843     uintptr_t ra = GETPC();
844     uint32_t len;
845     uint16_t v, c = env->regs[0];
846     uint64_t end, str, adj_end;
847 
848     /* Bits 32-47 of R0 must be zero.  */
849     if (env->regs[0] & 0xffff0000u) {
850         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
851     }
852 
853     str = get_address(env, r2);
854     end = get_address(env, r1);
855 
856     /* If the LSB of the two addresses differ, use one extra byte.  */
857     adj_end = end + ((str ^ end) & 1);
858 
859     /* Lest we fail to service interrupts in a timely manner, limit the
860        amount of work we're willing to do.  For now, let's cap at 8k.  */
861     for (len = 0; len < 0x2000; len += 2) {
862         if (str + len == adj_end) {
863             /* End of input found.  */
864             env->cc_op = 2;
865             return;
866         }
867         v = cpu_lduw_data_ra(env, str + len, ra);
868         if (v == c) {
869             /* Character found.  Set R1 to the location; R2 is unmodified.  */
870             env->cc_op = 1;
871             set_address(env, r1, str + len);
872             return;
873         }
874     }
875 
876     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
877     env->cc_op = 3;
878     set_address(env, r2, str + len);
879 }
880 
881 /* unsigned string compare (c is string terminator) */
882 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
883 {
884     uintptr_t ra = GETPC();
885     uint32_t len;
886 
887     c = c & 0xff;
888     s1 = wrap_address(env, s1);
889     s2 = wrap_address(env, s2);
890 
891     /* Lest we fail to service interrupts in a timely manner, limit the
892        amount of work we're willing to do.  For now, let's cap at 8k.  */
893     for (len = 0; len < 0x2000; ++len) {
894         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
895         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
896         if (v1 == v2) {
897             if (v1 == c) {
898                 /* Equal.  CC=0, and don't advance the registers.  */
899                 env->cc_op = 0;
900                 return int128_make128(s2, s1);
901             }
902         } else {
903             /* Unequal.  CC={1,2}, and advance the registers.  Note that
904                the terminator need not be zero, but the string that contains
905                the terminator is by definition "low".  */
906             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
907             return int128_make128(s2 + len, s1 + len);
908         }
909     }
910 
911     /* CPU-determined bytes equal; advance the registers.  */
912     env->cc_op = 3;
913     return int128_make128(s2 + len, s1 + len);
914 }
915 
916 /* move page */
917 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
918 {
919     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
920     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
921     const int mmu_idx = s390x_env_mmu_index(env, false);
922     const bool f = extract64(r0, 11, 1);
923     const bool s = extract64(r0, 10, 1);
924     const bool cco = extract64(r0, 8, 1);
925     uintptr_t ra = GETPC();
926     S390Access srca, desta;
927     int exc;
928 
929     if ((f && s) || extract64(r0, 12, 4)) {
930         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
931     }
932 
933     /*
934      * We always manually handle exceptions such that we can properly store
935      * r1/r2 to the lowcore on page-translation exceptions.
936      *
937      * TODO: Access key handling
938      */
939     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
940                             MMU_DATA_LOAD, mmu_idx, ra);
941     if (exc) {
942         if (cco) {
943             return 2;
944         }
945         goto inject_exc;
946     }
947     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
948                             MMU_DATA_STORE, mmu_idx, ra);
949     if (exc) {
950         if (cco && exc != PGM_PROTECTION) {
951             return 1;
952         }
953         goto inject_exc;
954     }
955     access_memmove(env, &desta, &srca, ra);
956     return 0; /* data moved */
957 inject_exc:
958 #if !defined(CONFIG_USER_ONLY)
959     if (exc != PGM_ADDRESSING) {
960         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
961                  env->tlb_fill_tec);
962     }
963     if (exc == PGM_PAGE_TRANS) {
964         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
965                  r1 << 4 | r2);
966     }
967 #endif
968     tcg_s390_program_interrupt(env, exc, ra);
969 }
970 
971 /* string copy */
972 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
973 {
974     const int mmu_idx = s390x_env_mmu_index(env, false);
975     const uint64_t d = get_address(env, r1);
976     const uint64_t s = get_address(env, r2);
977     const uint8_t c = env->regs[0];
978     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
979     S390Access srca, desta;
980     uintptr_t ra = GETPC();
981     int i;
982 
983     if (env->regs[0] & 0xffffff00ull) {
984         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
985     }
986 
987     /*
988      * Our access should not exceed single pages, as we must not report access
989      * exceptions exceeding the actually copied range (which we don't know at
990      * this point). We might over-indicate watchpoints within the pages
991      * (if we ever care, we have to limit processing to a single byte).
992      */
993     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
994     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
995 
996     set_helper_retaddr(ra);
997     for (i = 0; i < len; i++) {
998         const uint8_t v = access_get_byte(env, &srca, i, ra);
999 
1000         access_set_byte(env, &desta, i, v, ra);
1001         if (v == c) {
1002             clear_helper_retaddr();
1003             set_address_zero(env, r1, d + i);
1004             return 1;
1005         }
1006     }
1007     clear_helper_retaddr();
1008     set_address_zero(env, r1, d + len);
1009     set_address_zero(env, r2, s + len);
1010     return 3;
1011 }
1012 
1013 /* load access registers r1 to r3 from memory at a2 */
1014 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1015 {
1016     uintptr_t ra = GETPC();
1017     int i;
1018 
1019     if (a2 & 0x3) {
1020         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1021     }
1022 
1023     for (i = r1;; i = (i + 1) % 16) {
1024         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1025         a2 += 4;
1026 
1027         if (i == r3) {
1028             break;
1029         }
1030     }
1031 }
1032 
1033 /* store access registers r1 to r3 in memory at a2 */
1034 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1035 {
1036     uintptr_t ra = GETPC();
1037     int i;
1038 
1039     if (a2 & 0x3) {
1040         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1041     }
1042 
1043     for (i = r1;; i = (i + 1) % 16) {
1044         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1045         a2 += 4;
1046 
1047         if (i == r3) {
1048             break;
1049         }
1050     }
1051 }
1052 
1053 /* move long helper */
1054 static inline uint32_t do_mvcl(CPUS390XState *env,
1055                                uint64_t *dest, uint64_t *destlen,
1056                                uint64_t *src, uint64_t *srclen,
1057                                uint16_t pad, int wordsize, uintptr_t ra)
1058 {
1059     const int mmu_idx = s390x_env_mmu_index(env, false);
1060     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1061     S390Access srca, desta;
1062     int i, cc;
1063 
1064     if (*destlen == *srclen) {
1065         cc = 0;
1066     } else if (*destlen < *srclen) {
1067         cc = 1;
1068     } else {
1069         cc = 2;
1070     }
1071 
1072     if (!*destlen) {
1073         return cc;
1074     }
1075 
1076     /*
1077      * Only perform one type of type of operation (move/pad) at a time.
1078      * Stay within single pages.
1079      */
1080     if (*srclen) {
1081         /* Copy the src array */
1082         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1083         *destlen -= len;
1084         *srclen -= len;
1085         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1086         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1087         access_memmove(env, &desta, &srca, ra);
1088         *src = wrap_address(env, *src + len);
1089         *dest = wrap_address(env, *dest + len);
1090     } else if (wordsize == 1) {
1091         /* Pad the remaining area */
1092         *destlen -= len;
1093         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1094         access_memset(env, &desta, pad, ra);
1095         *dest = wrap_address(env, *dest + len);
1096     } else {
1097         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1098         set_helper_retaddr(ra);
1099 
1100         /* The remaining length selects the padding byte. */
1101         for (i = 0; i < len; (*destlen)--, i++) {
1102             if (*destlen & 1) {
1103                 access_set_byte(env, &desta, i, pad, ra);
1104             } else {
1105                 access_set_byte(env, &desta, i, pad >> 8, ra);
1106             }
1107         }
1108         clear_helper_retaddr();
1109         *dest = wrap_address(env, *dest + len);
1110     }
1111 
1112     return *destlen ? 3 : cc;
1113 }
1114 
1115 /* move long */
1116 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1117 {
1118     const int mmu_idx = s390x_env_mmu_index(env, false);
1119     uintptr_t ra = GETPC();
1120     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1121     uint64_t dest = get_address(env, r1);
1122     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1123     uint64_t src = get_address(env, r2);
1124     uint8_t pad = env->regs[r2 + 1] >> 24;
1125     CPUState *cs = env_cpu(env);
1126     S390Access srca, desta;
1127     uint32_t cc, cur_len;
1128 
1129     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1130         cc = 3;
1131     } else if (srclen == destlen) {
1132         cc = 0;
1133     } else if (destlen < srclen) {
1134         cc = 1;
1135     } else {
1136         cc = 2;
1137     }
1138 
1139     /* We might have to zero-out some bits even if there was no action. */
1140     if (unlikely(!destlen || cc == 3)) {
1141         set_address_zero(env, r2, src);
1142         set_address_zero(env, r1, dest);
1143         return cc;
1144     } else if (!srclen) {
1145         set_address_zero(env, r2, src);
1146     }
1147 
1148     /*
1149      * Only perform one type of type of operation (move/pad) in one step.
1150      * Stay within single pages.
1151      */
1152     while (destlen) {
1153         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1154         if (!srclen) {
1155             access_prepare(&desta, env, dest, cur_len,
1156                            MMU_DATA_STORE, mmu_idx, ra);
1157             access_memset(env, &desta, pad, ra);
1158         } else {
1159             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1160 
1161             access_prepare(&srca, env, src, cur_len,
1162                            MMU_DATA_LOAD, mmu_idx, ra);
1163             access_prepare(&desta, env, dest, cur_len,
1164                            MMU_DATA_STORE, mmu_idx, ra);
1165             access_memmove(env, &desta, &srca, ra);
1166             src = wrap_address(env, src + cur_len);
1167             srclen -= cur_len;
1168             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1169             set_address_zero(env, r2, src);
1170         }
1171         dest = wrap_address(env, dest + cur_len);
1172         destlen -= cur_len;
1173         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1174         set_address_zero(env, r1, dest);
1175 
1176         /*
1177          * MVCL is interruptible. Return to the main loop if requested after
1178          * writing back all state to registers. If no interrupt will get
1179          * injected, we'll end up back in this handler and continue processing
1180          * the remaining parts.
1181          */
1182         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1183             cpu_loop_exit_restore(cs, ra);
1184         }
1185     }
1186     return cc;
1187 }
1188 
1189 /* move long extended */
1190 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1191                        uint32_t r3)
1192 {
1193     uintptr_t ra = GETPC();
1194     uint64_t destlen = get_length(env, r1 + 1);
1195     uint64_t dest = get_address(env, r1);
1196     uint64_t srclen = get_length(env, r3 + 1);
1197     uint64_t src = get_address(env, r3);
1198     uint8_t pad = a2;
1199     uint32_t cc;
1200 
1201     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1202 
1203     set_length(env, r1 + 1, destlen);
1204     set_length(env, r3 + 1, srclen);
1205     set_address(env, r1, dest);
1206     set_address(env, r3, src);
1207 
1208     return cc;
1209 }
1210 
1211 /* move long unicode */
1212 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1213                        uint32_t r3)
1214 {
1215     uintptr_t ra = GETPC();
1216     uint64_t destlen = get_length(env, r1 + 1);
1217     uint64_t dest = get_address(env, r1);
1218     uint64_t srclen = get_length(env, r3 + 1);
1219     uint64_t src = get_address(env, r3);
1220     uint16_t pad = a2;
1221     uint32_t cc;
1222 
1223     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1224 
1225     set_length(env, r1 + 1, destlen);
1226     set_length(env, r3 + 1, srclen);
1227     set_address(env, r1, dest);
1228     set_address(env, r3, src);
1229 
1230     return cc;
1231 }
1232 
1233 /* compare logical long helper */
1234 static inline uint32_t do_clcl(CPUS390XState *env,
1235                                uint64_t *src1, uint64_t *src1len,
1236                                uint64_t *src3, uint64_t *src3len,
1237                                uint16_t pad, uint64_t limit,
1238                                int wordsize, uintptr_t ra)
1239 {
1240     uint64_t len = MAX(*src1len, *src3len);
1241     uint32_t cc = 0;
1242 
1243     check_alignment(env, *src1len | *src3len, wordsize, ra);
1244 
1245     if (!len) {
1246         return cc;
1247     }
1248 
1249     /* Lest we fail to service interrupts in a timely manner, limit the
1250        amount of work we're willing to do.  */
1251     if (len > limit) {
1252         len = limit;
1253         cc = 3;
1254     }
1255 
1256     for (; len; len -= wordsize) {
1257         uint16_t v1 = pad;
1258         uint16_t v3 = pad;
1259 
1260         if (*src1len) {
1261             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1262         }
1263         if (*src3len) {
1264             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1265         }
1266 
1267         if (v1 != v3) {
1268             cc = (v1 < v3) ? 1 : 2;
1269             break;
1270         }
1271 
1272         if (*src1len) {
1273             *src1 += wordsize;
1274             *src1len -= wordsize;
1275         }
1276         if (*src3len) {
1277             *src3 += wordsize;
1278             *src3len -= wordsize;
1279         }
1280     }
1281 
1282     return cc;
1283 }
1284 
1285 
1286 /* compare logical long */
1287 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1288 {
1289     uintptr_t ra = GETPC();
1290     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1291     uint64_t src1 = get_address(env, r1);
1292     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1293     uint64_t src3 = get_address(env, r2);
1294     uint8_t pad = env->regs[r2 + 1] >> 24;
1295     uint32_t cc;
1296 
1297     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1298 
1299     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1300     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1301     set_address(env, r1, src1);
1302     set_address(env, r2, src3);
1303 
1304     return cc;
1305 }
1306 
1307 /* compare logical long extended memcompare insn with padding */
1308 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1309                        uint32_t r3)
1310 {
1311     uintptr_t ra = GETPC();
1312     uint64_t src1len = get_length(env, r1 + 1);
1313     uint64_t src1 = get_address(env, r1);
1314     uint64_t src3len = get_length(env, r3 + 1);
1315     uint64_t src3 = get_address(env, r3);
1316     uint8_t pad = a2;
1317     uint32_t cc;
1318 
1319     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1320 
1321     set_length(env, r1 + 1, src1len);
1322     set_length(env, r3 + 1, src3len);
1323     set_address(env, r1, src1);
1324     set_address(env, r3, src3);
1325 
1326     return cc;
1327 }
1328 
1329 /* compare logical long unicode memcompare insn with padding */
1330 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1331                        uint32_t r3)
1332 {
1333     uintptr_t ra = GETPC();
1334     uint64_t src1len = get_length(env, r1 + 1);
1335     uint64_t src1 = get_address(env, r1);
1336     uint64_t src3len = get_length(env, r3 + 1);
1337     uint64_t src3 = get_address(env, r3);
1338     uint16_t pad = a2;
1339     uint32_t cc = 0;
1340 
1341     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1342 
1343     set_length(env, r1 + 1, src1len);
1344     set_length(env, r3 + 1, src3len);
1345     set_address(env, r1, src1);
1346     set_address(env, r3, src3);
1347 
1348     return cc;
1349 }
1350 
1351 /* checksum */
1352 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1353                     uint64_t src, uint64_t src_len)
1354 {
1355     uintptr_t ra = GETPC();
1356     uint64_t max_len, len;
1357     uint64_t cksm = (uint32_t)r1;
1358 
1359     /* Lest we fail to service interrupts in a timely manner, limit the
1360        amount of work we're willing to do.  For now, let's cap at 8k.  */
1361     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1362 
1363     /* Process full words as available.  */
1364     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1365         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1366     }
1367 
1368     switch (max_len - len) {
1369     case 1:
1370         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1371         len += 1;
1372         break;
1373     case 2:
1374         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1375         len += 2;
1376         break;
1377     case 3:
1378         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1379         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1380         len += 3;
1381         break;
1382     }
1383 
1384     /* Fold the carry from the checksum.  Note that we can see carry-out
1385        during folding more than once (but probably not more than twice).  */
1386     while (cksm > 0xffffffffull) {
1387         cksm = (uint32_t)cksm + (cksm >> 32);
1388     }
1389 
1390     /* Indicate whether or not we've processed everything.  */
1391     env->cc_op = (len == src_len ? 0 : 3);
1392 
1393     /* Return both cksm and processed length.  */
1394     return int128_make128(cksm, len);
1395 }
1396 
1397 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1398 {
1399     uintptr_t ra = GETPC();
1400     int len_dest = len >> 4;
1401     int len_src = len & 0xf;
1402     uint8_t b;
1403 
1404     dest += len_dest;
1405     src += len_src;
1406 
1407     /* last byte is special, it only flips the nibbles */
1408     b = cpu_ldub_data_ra(env, src, ra);
1409     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1410     src--;
1411     len_src--;
1412 
1413     /* now pack every value */
1414     while (len_dest > 0) {
1415         b = 0;
1416 
1417         if (len_src >= 0) {
1418             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1419             src--;
1420             len_src--;
1421         }
1422         if (len_src >= 0) {
1423             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1424             src--;
1425             len_src--;
1426         }
1427 
1428         len_dest--;
1429         dest--;
1430         cpu_stb_data_ra(env, dest, b, ra);
1431     }
1432 }
1433 
1434 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1435                            uint32_t srclen, int ssize, uintptr_t ra)
1436 {
1437     int i;
1438     /* The destination operand is always 16 bytes long.  */
1439     const int destlen = 16;
1440 
1441     /* The operands are processed from right to left.  */
1442     src += srclen - 1;
1443     dest += destlen - 1;
1444 
1445     for (i = 0; i < destlen; i++) {
1446         uint8_t b = 0;
1447 
1448         /* Start with a positive sign */
1449         if (i == 0) {
1450             b = 0xc;
1451         } else if (srclen > ssize) {
1452             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1453             src -= ssize;
1454             srclen -= ssize;
1455         }
1456 
1457         if (srclen > ssize) {
1458             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1459             src -= ssize;
1460             srclen -= ssize;
1461         }
1462 
1463         cpu_stb_data_ra(env, dest, b, ra);
1464         dest--;
1465     }
1466 }
1467 
1468 
1469 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1470                  uint32_t srclen)
1471 {
1472     do_pkau(env, dest, src, srclen, 1, GETPC());
1473 }
1474 
1475 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1476                  uint32_t srclen)
1477 {
1478     do_pkau(env, dest, src, srclen, 2, GETPC());
1479 }
1480 
1481 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1482                   uint64_t src)
1483 {
1484     uintptr_t ra = GETPC();
1485     int len_dest = len >> 4;
1486     int len_src = len & 0xf;
1487     uint8_t b;
1488     int second_nibble = 0;
1489 
1490     dest += len_dest;
1491     src += len_src;
1492 
1493     /* last byte is special, it only flips the nibbles */
1494     b = cpu_ldub_data_ra(env, src, ra);
1495     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1496     src--;
1497     len_src--;
1498 
1499     /* now pad every nibble with 0xf0 */
1500 
1501     while (len_dest > 0) {
1502         uint8_t cur_byte = 0;
1503 
1504         if (len_src > 0) {
1505             cur_byte = cpu_ldub_data_ra(env, src, ra);
1506         }
1507 
1508         len_dest--;
1509         dest--;
1510 
1511         /* only advance one nibble at a time */
1512         if (second_nibble) {
1513             cur_byte >>= 4;
1514             len_src--;
1515             src--;
1516         }
1517         second_nibble = !second_nibble;
1518 
1519         /* digit */
1520         cur_byte = (cur_byte & 0xf);
1521         /* zone bits */
1522         cur_byte |= 0xf0;
1523 
1524         cpu_stb_data_ra(env, dest, cur_byte, ra);
1525     }
1526 }
1527 
1528 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1529                                  uint32_t destlen, int dsize, uint64_t src,
1530                                  uintptr_t ra)
1531 {
1532     int i;
1533     uint32_t cc;
1534     uint8_t b;
1535     /* The source operand is always 16 bytes long.  */
1536     const int srclen = 16;
1537 
1538     /* The operands are processed from right to left.  */
1539     src += srclen - 1;
1540     dest += destlen - dsize;
1541 
1542     /* Check for the sign.  */
1543     b = cpu_ldub_data_ra(env, src, ra);
1544     src--;
1545     switch (b & 0xf) {
1546     case 0xa:
1547     case 0xc:
1548     case 0xe ... 0xf:
1549         cc = 0;  /* plus */
1550         break;
1551     case 0xb:
1552     case 0xd:
1553         cc = 1;  /* minus */
1554         break;
1555     default:
1556     case 0x0 ... 0x9:
1557         cc = 3;  /* invalid */
1558         break;
1559     }
1560 
1561     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1562     for (i = 0; i < destlen; i += dsize) {
1563         if (i == (31 * dsize)) {
1564             /* If length is 32/64 bytes, the leftmost byte is 0. */
1565             b = 0;
1566         } else if (i % (2 * dsize)) {
1567             b = cpu_ldub_data_ra(env, src, ra);
1568             src--;
1569         } else {
1570             b >>= 4;
1571         }
1572         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1573         dest -= dsize;
1574     }
1575 
1576     return cc;
1577 }
1578 
1579 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1580                        uint64_t src)
1581 {
1582     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1583 }
1584 
1585 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1586                        uint64_t src)
1587 {
1588     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1589 }
1590 
1591 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1592 {
1593     uintptr_t ra = GETPC();
1594     uint32_t cc = 0;
1595     int i;
1596 
1597     for (i = 0; i < destlen; i++) {
1598         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1599         /* digit */
1600         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1601 
1602         if (i == (destlen - 1)) {
1603             /* sign */
1604             cc |= (b & 0xf) < 0xa ? 1 : 0;
1605         } else {
1606             /* digit */
1607             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1608         }
1609     }
1610 
1611     return cc;
1612 }
1613 
1614 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1615                              uint64_t trans, uintptr_t ra)
1616 {
1617     uint32_t i;
1618 
1619     for (i = 0; i <= len; i++) {
1620         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1621         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1622         cpu_stb_data_ra(env, array + i, new_byte, ra);
1623     }
1624 
1625     return env->cc_op;
1626 }
1627 
1628 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1629                 uint64_t trans)
1630 {
1631     do_helper_tr(env, len, array, trans, GETPC());
1632 }
1633 
1634 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1635                    uint64_t len, uint64_t trans)
1636 {
1637     uintptr_t ra = GETPC();
1638     uint8_t end = env->regs[0] & 0xff;
1639     uint64_t l = len;
1640     uint64_t i;
1641     uint32_t cc = 0;
1642 
1643     if (!(env->psw.mask & PSW_MASK_64)) {
1644         array &= 0x7fffffff;
1645         l = (uint32_t)l;
1646     }
1647 
1648     /* Lest we fail to service interrupts in a timely manner, limit the
1649        amount of work we're willing to do.  For now, let's cap at 8k.  */
1650     if (l > 0x2000) {
1651         l = 0x2000;
1652         cc = 3;
1653     }
1654 
1655     for (i = 0; i < l; i++) {
1656         uint8_t byte, new_byte;
1657 
1658         byte = cpu_ldub_data_ra(env, array + i, ra);
1659 
1660         if (byte == end) {
1661             cc = 1;
1662             break;
1663         }
1664 
1665         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1666         cpu_stb_data_ra(env, array + i, new_byte, ra);
1667     }
1668 
1669     env->cc_op = cc;
1670     return int128_make128(len - i, array + i);
1671 }
1672 
1673 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1674                                      uint64_t array, uint64_t trans,
1675                                      int inc, uintptr_t ra)
1676 {
1677     int i;
1678 
1679     for (i = 0; i <= len; i++) {
1680         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1681         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1682 
1683         if (sbyte != 0) {
1684             set_address(env, 1, array + i * inc);
1685             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1686             return (i == len) ? 2 : 1;
1687         }
1688     }
1689 
1690     return 0;
1691 }
1692 
1693 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1694                                   uint64_t array, uint64_t trans,
1695                                   uintptr_t ra)
1696 {
1697     return do_helper_trt(env, len, array, trans, 1, ra);
1698 }
1699 
1700 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1701                      uint64_t trans)
1702 {
1703     return do_helper_trt(env, len, array, trans, 1, GETPC());
1704 }
1705 
1706 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1707                                    uint64_t array, uint64_t trans,
1708                                    uintptr_t ra)
1709 {
1710     return do_helper_trt(env, len, array, trans, -1, ra);
1711 }
1712 
1713 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1714                       uint64_t trans)
1715 {
1716     return do_helper_trt(env, len, array, trans, -1, GETPC());
1717 }
1718 
1719 /* Translate one/two to one/two */
1720 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1721                       uint32_t tst, uint32_t sizes)
1722 {
1723     uintptr_t ra = GETPC();
1724     int dsize = (sizes & 1) ? 1 : 2;
1725     int ssize = (sizes & 2) ? 1 : 2;
1726     uint64_t tbl = get_address(env, 1);
1727     uint64_t dst = get_address(env, r1);
1728     uint64_t len = get_length(env, r1 + 1);
1729     uint64_t src = get_address(env, r2);
1730     uint32_t cc = 3;
1731     int i;
1732 
1733     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1734        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1735        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1736     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1737         tbl &= -4096;
1738     } else {
1739         tbl &= -8;
1740     }
1741 
1742     check_alignment(env, len, ssize, ra);
1743 
1744     /* Lest we fail to service interrupts in a timely manner, */
1745     /* limit the amount of work we're willing to do.   */
1746     for (i = 0; i < 0x2000; i++) {
1747         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1748         uint64_t tble = tbl + (sval * dsize);
1749         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1750         if (dval == tst) {
1751             cc = 1;
1752             break;
1753         }
1754         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1755 
1756         len -= ssize;
1757         src += ssize;
1758         dst += dsize;
1759 
1760         if (len == 0) {
1761             cc = 0;
1762             break;
1763         }
1764     }
1765 
1766     set_address(env, r1, dst);
1767     set_length(env, r1 + 1, len);
1768     set_address(env, r2, src);
1769 
1770     return cc;
1771 }
1772 
1773 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1774                         uint64_t a2, bool parallel)
1775 {
1776     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1777     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1778     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1779     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1780     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1781     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1782     uintptr_t ra = GETPC();
1783     uint32_t fc = extract32(env->regs[0], 0, 8);
1784     uint32_t sc = extract32(env->regs[0], 8, 8);
1785     uint64_t pl = get_address(env, 1) & -16;
1786     uint64_t svh, svl;
1787     uint32_t cc;
1788 
1789     /* Sanity check the function code and storage characteristic.  */
1790     if (fc > 1 || sc > 3) {
1791         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1792             goto spec_exception;
1793         }
1794         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1795             goto spec_exception;
1796         }
1797     }
1798 
1799     /* Sanity check the alignments.  */
1800     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1801         goto spec_exception;
1802     }
1803 
1804     /* Sanity check writability of the store address.  */
1805     probe_write(env, a2, 1 << sc, mem_idx, ra);
1806 
1807     /*
1808      * Note that the compare-and-swap is atomic, and the store is atomic,
1809      * but the complete operation is not.  Therefore we do not need to
1810      * assert serial context in order to implement this.  That said,
1811      * restart early if we can't support either operation that is supposed
1812      * to be atomic.
1813      */
1814     if (parallel) {
1815         uint32_t max = 2;
1816 #ifdef CONFIG_ATOMIC64
1817         max = 3;
1818 #endif
1819         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1820             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1821             cpu_loop_exit_atomic(env_cpu(env), ra);
1822         }
1823     }
1824 
1825     /*
1826      * All loads happen before all stores.  For simplicity, load the entire
1827      * store value area from the parameter list.
1828      */
1829     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1830     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1831 
1832     switch (fc) {
1833     case 0:
1834         {
1835             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1836             uint32_t cv = env->regs[r3];
1837             uint32_t ov;
1838 
1839             if (parallel) {
1840                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1841             } else {
1842                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1843                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1844             }
1845             cc = (ov != cv);
1846             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1847         }
1848         break;
1849 
1850     case 1:
1851         {
1852             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1853             uint64_t cv = env->regs[r3];
1854             uint64_t ov;
1855 
1856             if (parallel) {
1857 #ifdef CONFIG_ATOMIC64
1858                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1859 #else
1860                 /* Note that we asserted !parallel above.  */
1861                 g_assert_not_reached();
1862 #endif
1863             } else {
1864                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1865                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1866             }
1867             cc = (ov != cv);
1868             env->regs[r3] = ov;
1869         }
1870         break;
1871 
1872     case 2:
1873         {
1874             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1875             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1876             Int128 ov;
1877 
1878             if (!parallel) {
1879                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1880                 cc = !int128_eq(ov, cv);
1881                 if (cc) {
1882                     nv = ov;
1883                 }
1884                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1885             } else if (HAVE_CMPXCHG128) {
1886                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1887                 cc = !int128_eq(ov, cv);
1888             } else {
1889                 /* Note that we asserted !parallel above.  */
1890                 g_assert_not_reached();
1891             }
1892 
1893             env->regs[r3 + 0] = int128_gethi(ov);
1894             env->regs[r3 + 1] = int128_getlo(ov);
1895         }
1896         break;
1897 
1898     default:
1899         g_assert_not_reached();
1900     }
1901 
1902     /* Store only if the comparison succeeded.  Note that above we use a pair
1903        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1904        from the most-significant bits of svh.  */
1905     if (cc == 0) {
1906         switch (sc) {
1907         case 0:
1908             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1909             break;
1910         case 1:
1911             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1912             break;
1913         case 2:
1914             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1915             break;
1916         case 3:
1917             cpu_stq_mmu(env, a2, svh, oi8, ra);
1918             break;
1919         case 4:
1920             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1921             break;
1922         default:
1923             g_assert_not_reached();
1924         }
1925     }
1926 
1927     return cc;
1928 
1929  spec_exception:
1930     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1931 }
1932 
1933 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1934 {
1935     return do_csst(env, r3, a1, a2, false);
1936 }
1937 
1938 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1939                                uint64_t a2)
1940 {
1941     return do_csst(env, r3, a1, a2, true);
1942 }
1943 
1944 #if !defined(CONFIG_USER_ONLY)
1945 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1946 {
1947     uintptr_t ra = GETPC();
1948     bool PERchanged = false;
1949     uint64_t src = a2;
1950     uint32_t i;
1951 
1952     if (src & 0x7) {
1953         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1954     }
1955 
1956     for (i = r1;; i = (i + 1) % 16) {
1957         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1958         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1959             PERchanged = true;
1960         }
1961         env->cregs[i] = val;
1962         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1963                    i, src, val);
1964         src += sizeof(uint64_t);
1965 
1966         if (i == r3) {
1967             break;
1968         }
1969     }
1970 
1971     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1972         s390_cpu_recompute_watchpoints(env_cpu(env));
1973     }
1974 
1975     tlb_flush(env_cpu(env));
1976 }
1977 
1978 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1979 {
1980     uintptr_t ra = GETPC();
1981     bool PERchanged = false;
1982     uint64_t src = a2;
1983     uint32_t i;
1984 
1985     if (src & 0x3) {
1986         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1987     }
1988 
1989     for (i = r1;; i = (i + 1) % 16) {
1990         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1991         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1992             PERchanged = true;
1993         }
1994         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1995         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1996         src += sizeof(uint32_t);
1997 
1998         if (i == r3) {
1999             break;
2000         }
2001     }
2002 
2003     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2004         s390_cpu_recompute_watchpoints(env_cpu(env));
2005     }
2006 
2007     tlb_flush(env_cpu(env));
2008 }
2009 
2010 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2011 {
2012     uintptr_t ra = GETPC();
2013     uint64_t dest = a2;
2014     uint32_t i;
2015 
2016     if (dest & 0x7) {
2017         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2018     }
2019 
2020     for (i = r1;; i = (i + 1) % 16) {
2021         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2022         dest += sizeof(uint64_t);
2023 
2024         if (i == r3) {
2025             break;
2026         }
2027     }
2028 }
2029 
2030 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2031 {
2032     uintptr_t ra = GETPC();
2033     uint64_t dest = a2;
2034     uint32_t i;
2035 
2036     if (dest & 0x3) {
2037         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2038     }
2039 
2040     for (i = r1;; i = (i + 1) % 16) {
2041         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2042         dest += sizeof(uint32_t);
2043 
2044         if (i == r3) {
2045             break;
2046         }
2047     }
2048 }
2049 
2050 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2051 {
2052     uintptr_t ra = GETPC();
2053     int i;
2054 
2055     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2056 
2057     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2058         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2059     }
2060 
2061     return 0;
2062 }
2063 
2064 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2065 {
2066     S390CPU *cpu = env_archcpu(env);
2067     CPUState *cs = env_cpu(env);
2068 
2069     /*
2070      * TODO: we currently don't handle all access protection types
2071      * (including access-list and key-controlled) as well as AR mode.
2072      */
2073     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2074         /* Fetching permitted; storing permitted */
2075         return 0;
2076     }
2077 
2078     if (env->int_pgm_code == PGM_PROTECTION) {
2079         /* retry if reading is possible */
2080         cs->exception_index = -1;
2081         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2082             /* Fetching permitted; storing not permitted */
2083             return 1;
2084         }
2085     }
2086 
2087     switch (env->int_pgm_code) {
2088     case PGM_PROTECTION:
2089         /* Fetching not permitted; storing not permitted */
2090         cs->exception_index = -1;
2091         return 2;
2092     case PGM_ADDRESSING:
2093     case PGM_TRANS_SPEC:
2094         /* exceptions forwarded to the guest */
2095         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2096         return 0;
2097     }
2098 
2099     /* Translation not available */
2100     cs->exception_index = -1;
2101     return 3;
2102 }
2103 
2104 /* insert storage key extended */
2105 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2106 {
2107     static S390SKeysState *ss;
2108     static S390SKeysClass *skeyclass;
2109     uint64_t addr = wrap_address(env, r2);
2110     uint8_t key;
2111     int rc;
2112 
2113     addr = mmu_real2abs(env, addr);
2114     if (!mmu_absolute_addr_valid(addr, false)) {
2115         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2116     }
2117 
2118     if (unlikely(!ss)) {
2119         ss = s390_get_skeys_device();
2120         skeyclass = S390_SKEYS_GET_CLASS(ss);
2121         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2122             tlb_flush_all_cpus_synced(env_cpu(env));
2123         }
2124     }
2125 
2126     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2127     if (rc) {
2128         return 0;
2129     }
2130     return key;
2131 }
2132 
2133 /* set storage key extended */
2134 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2135 {
2136     static S390SKeysState *ss;
2137     static S390SKeysClass *skeyclass;
2138     uint64_t addr = wrap_address(env, r2);
2139     uint8_t key;
2140 
2141     addr = mmu_real2abs(env, addr);
2142     if (!mmu_absolute_addr_valid(addr, false)) {
2143         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2144     }
2145 
2146     if (unlikely(!ss)) {
2147         ss = s390_get_skeys_device();
2148         skeyclass = S390_SKEYS_GET_CLASS(ss);
2149         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2150             tlb_flush_all_cpus_synced(env_cpu(env));
2151         }
2152     }
2153 
2154     key = r1 & 0xfe;
2155     s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2156    /*
2157     * As we can only flush by virtual address and not all the entries
2158     * that point to a physical address we have to flush the whole TLB.
2159     */
2160     tlb_flush_all_cpus_synced(env_cpu(env));
2161 }
2162 
2163 /* reset reference bit extended */
2164 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2165 {
2166     uint64_t addr = wrap_address(env, r2);
2167     static S390SKeysState *ss;
2168     static S390SKeysClass *skeyclass;
2169     uint8_t re, key;
2170     int rc;
2171 
2172     addr = mmu_real2abs(env, addr);
2173     if (!mmu_absolute_addr_valid(addr, false)) {
2174         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2175     }
2176 
2177     if (unlikely(!ss)) {
2178         ss = s390_get_skeys_device();
2179         skeyclass = S390_SKEYS_GET_CLASS(ss);
2180         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2181             tlb_flush_all_cpus_synced(env_cpu(env));
2182         }
2183     }
2184 
2185     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2186     if (rc) {
2187         return 0;
2188     }
2189 
2190     re = key & (SK_R | SK_C);
2191     key &= ~SK_R;
2192 
2193     rc = s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2194     if (rc) {
2195         return 0;
2196     }
2197    /*
2198     * As we can only flush by virtual address and not all the entries
2199     * that point to a physical address we have to flush the whole TLB.
2200     */
2201     tlb_flush_all_cpus_synced(env_cpu(env));
2202 
2203     /*
2204      * cc
2205      *
2206      * 0  Reference bit zero; change bit zero
2207      * 1  Reference bit zero; change bit one
2208      * 2  Reference bit one; change bit zero
2209      * 3  Reference bit one; change bit one
2210      */
2211 
2212     return re >> 1;
2213 }
2214 
2215 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2216                       uint64_t key)
2217 {
2218     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2219     S390Access srca, desta;
2220     uintptr_t ra = GETPC();
2221     int cc = 0;
2222 
2223     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2224                __func__, l, a1, a2);
2225 
2226     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2227         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2228         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2229     }
2230 
2231     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2232         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2233     }
2234 
2235     l = wrap_length32(env, l);
2236     if (l > 256) {
2237         /* max 256 */
2238         l = 256;
2239         cc = 3;
2240     } else if (!l) {
2241         return cc;
2242     }
2243 
2244     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2245     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2246     access_memmove(env, &desta, &srca, ra);
2247     return cc;
2248 }
2249 
2250 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2251                       uint64_t key)
2252 {
2253     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2254     S390Access srca, desta;
2255     uintptr_t ra = GETPC();
2256     int cc = 0;
2257 
2258     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2259                __func__, l, a1, a2);
2260 
2261     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2262         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2263         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2264     }
2265 
2266     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2267         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2268     }
2269 
2270     l = wrap_length32(env, l);
2271     if (l > 256) {
2272         /* max 256 */
2273         l = 256;
2274         cc = 3;
2275     } else if (!l) {
2276         return cc;
2277     }
2278     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2279     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2280     access_memmove(env, &desta, &srca, ra);
2281     return cc;
2282 }
2283 
2284 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2285 {
2286     CPUState *cs = env_cpu(env);
2287     const uintptr_t ra = GETPC();
2288     uint64_t table, entry, raddr;
2289     uint16_t entries, i, index = 0;
2290 
2291     if (r2 & 0xff000) {
2292         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2293     }
2294 
2295     if (!(r2 & 0x800)) {
2296         /* invalidation-and-clearing operation */
2297         table = r1 & ASCE_ORIGIN;
2298         entries = (r2 & 0x7ff) + 1;
2299 
2300         switch (r1 & ASCE_TYPE_MASK) {
2301         case ASCE_TYPE_REGION1:
2302             index = (r2 >> 53) & 0x7ff;
2303             break;
2304         case ASCE_TYPE_REGION2:
2305             index = (r2 >> 42) & 0x7ff;
2306             break;
2307         case ASCE_TYPE_REGION3:
2308             index = (r2 >> 31) & 0x7ff;
2309             break;
2310         case ASCE_TYPE_SEGMENT:
2311             index = (r2 >> 20) & 0x7ff;
2312             break;
2313         }
2314         for (i = 0; i < entries; i++) {
2315             /* addresses are not wrapped in 24/31bit mode but table index is */
2316             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2317             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2318             if (!(entry & REGION_ENTRY_I)) {
2319                 /* we are allowed to not store if already invalid */
2320                 entry |= REGION_ENTRY_I;
2321                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2322             }
2323         }
2324     }
2325 
2326     /* We simply flush the complete tlb, therefore we can ignore r3. */
2327     if (m4 & 1) {
2328         tlb_flush(cs);
2329     } else {
2330         tlb_flush_all_cpus_synced(cs);
2331     }
2332 }
2333 
2334 /* invalidate pte */
2335 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2336                   uint32_t m4)
2337 {
2338     CPUState *cs = env_cpu(env);
2339     const uintptr_t ra = GETPC();
2340     uint64_t page = vaddr & TARGET_PAGE_MASK;
2341     uint64_t pte_addr, pte;
2342 
2343     /* Compute the page table entry address */
2344     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2345     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2346 
2347     /* Mark the page table entry as invalid */
2348     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2349     pte |= PAGE_ENTRY_I;
2350     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2351 
2352     /* XXX we exploit the fact that Linux passes the exact virtual
2353        address here - it's not obliged to! */
2354     if (m4 & 1) {
2355         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2356             tlb_flush_page(cs, page);
2357             /* XXX 31-bit hack */
2358             tlb_flush_page(cs, page ^ 0x80000000);
2359         } else {
2360             /* looks like we don't have a valid virtual address */
2361             tlb_flush(cs);
2362         }
2363     } else {
2364         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2365             tlb_flush_page_all_cpus_synced(cs, page);
2366             /* XXX 31-bit hack */
2367             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2368         } else {
2369             /* looks like we don't have a valid virtual address */
2370             tlb_flush_all_cpus_synced(cs);
2371         }
2372     }
2373 }
2374 
2375 /* flush local tlb */
2376 void HELPER(ptlb)(CPUS390XState *env)
2377 {
2378     tlb_flush(env_cpu(env));
2379 }
2380 
2381 /* flush global tlb */
2382 void HELPER(purge)(CPUS390XState *env)
2383 {
2384     tlb_flush_all_cpus_synced(env_cpu(env));
2385 }
2386 
2387 /* load real address */
2388 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2389 {
2390     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2391     uint64_t ret, tec;
2392     int flags, exc, cc;
2393 
2394     /* XXX incomplete - has more corner cases */
2395     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2396         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2397     }
2398 
2399     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2400     if (exc) {
2401         cc = 3;
2402         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2403     } else {
2404         cc = 0;
2405         ret |= addr & ~TARGET_PAGE_MASK;
2406     }
2407 
2408     env->cc_op = cc;
2409     return ret;
2410 }
2411 #endif
2412 
2413 /* Execute instruction.  This instruction executes an insn modified with
2414    the contents of r1.  It does not change the executed instruction in memory;
2415    it does not change the program counter.
2416 
2417    Perform this by recording the modified instruction in env->ex_value.
2418    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2419 */
2420 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2421 {
2422     uint64_t insn;
2423     uint8_t opc;
2424 
2425     /* EXECUTE targets must be at even addresses.  */
2426     if (addr & 1) {
2427         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2428     }
2429 
2430     insn = cpu_lduw_code(env, addr);
2431     opc = insn >> 8;
2432 
2433     /* Or in the contents of R1[56:63].  */
2434     insn |= r1 & 0xff;
2435 
2436     /* Load the rest of the instruction.  */
2437     insn <<= 48;
2438     switch (get_ilen(opc)) {
2439     case 2:
2440         break;
2441     case 4:
2442         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2443         break;
2444     case 6:
2445         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2446         break;
2447     default:
2448         g_assert_not_reached();
2449     }
2450 
2451     /* The very most common cases can be sped up by avoiding a new TB.  */
2452     if ((opc & 0xf0) == 0xd0) {
2453         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2454                                       uint64_t, uintptr_t);
2455         static const dx_helper dx[16] = {
2456             [0x0] = do_helper_trt_bkwd,
2457             [0x2] = do_helper_mvc,
2458             [0x4] = do_helper_nc,
2459             [0x5] = do_helper_clc,
2460             [0x6] = do_helper_oc,
2461             [0x7] = do_helper_xc,
2462             [0xc] = do_helper_tr,
2463             [0xd] = do_helper_trt_fwd,
2464         };
2465         dx_helper helper = dx[opc & 0xf];
2466 
2467         if (helper) {
2468             uint32_t l = extract64(insn, 48, 8);
2469             uint32_t b1 = extract64(insn, 44, 4);
2470             uint32_t d1 = extract64(insn, 32, 12);
2471             uint32_t b2 = extract64(insn, 28, 4);
2472             uint32_t d2 = extract64(insn, 16, 12);
2473             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2474             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2475 
2476             env->cc_op = helper(env, l, a1, a2, 0);
2477             env->psw.addr += ilen;
2478             return;
2479         }
2480     } else if (opc == 0x0a) {
2481         env->int_svc_code = extract64(insn, 48, 8);
2482         env->int_svc_ilen = ilen;
2483         helper_exception(env, EXCP_SVC);
2484         g_assert_not_reached();
2485     }
2486 
2487     /* Record the insn we want to execute as well as the ilen to use
2488        during the execution of the target insn.  This will also ensure
2489        that ex_value is non-zero, which flags that we are in a state
2490        that requires such execution.  */
2491     env->ex_value = insn | ilen;
2492     env->ex_target = addr;
2493 }
2494 
2495 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2496                        uint64_t len)
2497 {
2498     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2499     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2500     const uint64_t r0 = env->regs[0];
2501     const uintptr_t ra = GETPC();
2502     uint8_t dest_key, dest_as, dest_k, dest_a;
2503     uint8_t src_key, src_as, src_k, src_a;
2504     uint64_t val;
2505     int cc = 0;
2506 
2507     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2508                __func__, dest, src, len);
2509 
2510     if (!(env->psw.mask & PSW_MASK_DAT)) {
2511         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2512     }
2513 
2514     /* OAC (operand access control) for the first operand -> dest */
2515     val = (r0 & 0xffff0000ULL) >> 16;
2516     dest_key = (val >> 12) & 0xf;
2517     dest_as = (val >> 6) & 0x3;
2518     dest_k = (val >> 1) & 0x1;
2519     dest_a = val & 0x1;
2520 
2521     /* OAC (operand access control) for the second operand -> src */
2522     val = (r0 & 0x0000ffffULL);
2523     src_key = (val >> 12) & 0xf;
2524     src_as = (val >> 6) & 0x3;
2525     src_k = (val >> 1) & 0x1;
2526     src_a = val & 0x1;
2527 
2528     if (!dest_k) {
2529         dest_key = psw_key;
2530     }
2531     if (!src_k) {
2532         src_key = psw_key;
2533     }
2534     if (!dest_a) {
2535         dest_as = psw_as;
2536     }
2537     if (!src_a) {
2538         src_as = psw_as;
2539     }
2540 
2541     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2542         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2543     }
2544     if (!(env->cregs[0] & CR0_SECONDARY) &&
2545         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2546         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2547     }
2548     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2549         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2550     }
2551 
2552     len = wrap_length32(env, len);
2553     if (len > 4096) {
2554         cc = 3;
2555         len = 4096;
2556     }
2557 
2558     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2559     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2560         (env->psw.mask & PSW_MASK_PSTATE)) {
2561         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2562                       __func__);
2563         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2564     }
2565 
2566     /* FIXME: Access using correct keys and AR-mode */
2567     if (len) {
2568         S390Access srca, desta;
2569 
2570         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2571                        mmu_idx_from_as(src_as), ra);
2572         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2573                        mmu_idx_from_as(dest_as), ra);
2574 
2575         access_memmove(env, &desta, &srca, ra);
2576     }
2577 
2578     return cc;
2579 }
2580 
2581 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2582    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2583    value >= 0 indicates failure, and the CC value to be returned.  */
2584 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2585                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2586                                  uint32_t *ochar, uint32_t *olen);
2587 
2588 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2589    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2590    indicates failure, and the CC value to be returned.  */
2591 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2592                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2593                                  uint32_t *olen);
2594 
2595 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2596                        bool enh_check, uintptr_t ra,
2597                        uint32_t *ochar, uint32_t *olen)
2598 {
2599     uint8_t s0, s1, s2, s3;
2600     uint32_t c, l;
2601 
2602     if (ilen < 1) {
2603         return 0;
2604     }
2605     s0 = cpu_ldub_data_ra(env, addr, ra);
2606     if (s0 <= 0x7f) {
2607         /* one byte character */
2608         l = 1;
2609         c = s0;
2610     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2611         /* invalid character */
2612         return 2;
2613     } else if (s0 <= 0xdf) {
2614         /* two byte character */
2615         l = 2;
2616         if (ilen < 2) {
2617             return 0;
2618         }
2619         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2620         c = s0 & 0x1f;
2621         c = (c << 6) | (s1 & 0x3f);
2622         if (enh_check && (s1 & 0xc0) != 0x80) {
2623             return 2;
2624         }
2625     } else if (s0 <= 0xef) {
2626         /* three byte character */
2627         l = 3;
2628         if (ilen < 3) {
2629             return 0;
2630         }
2631         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2632         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2633         c = s0 & 0x0f;
2634         c = (c << 6) | (s1 & 0x3f);
2635         c = (c << 6) | (s2 & 0x3f);
2636         /* Fold the byte-by-byte range descriptions in the PoO into
2637            tests against the complete value.  It disallows encodings
2638            that could be smaller, and the UTF-16 surrogates.  */
2639         if (enh_check
2640             && ((s1 & 0xc0) != 0x80
2641                 || (s2 & 0xc0) != 0x80
2642                 || c < 0x1000
2643                 || (c >= 0xd800 && c <= 0xdfff))) {
2644             return 2;
2645         }
2646     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2647         /* four byte character */
2648         l = 4;
2649         if (ilen < 4) {
2650             return 0;
2651         }
2652         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2653         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2654         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2655         c = s0 & 0x07;
2656         c = (c << 6) | (s1 & 0x3f);
2657         c = (c << 6) | (s2 & 0x3f);
2658         c = (c << 6) | (s3 & 0x3f);
2659         /* See above.  */
2660         if (enh_check
2661             && ((s1 & 0xc0) != 0x80
2662                 || (s2 & 0xc0) != 0x80
2663                 || (s3 & 0xc0) != 0x80
2664                 || c < 0x010000
2665                 || c > 0x10ffff)) {
2666             return 2;
2667         }
2668     } else {
2669         /* invalid character */
2670         return 2;
2671     }
2672 
2673     *ochar = c;
2674     *olen = l;
2675     return -1;
2676 }
2677 
2678 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2679                         bool enh_check, uintptr_t ra,
2680                         uint32_t *ochar, uint32_t *olen)
2681 {
2682     uint16_t s0, s1;
2683     uint32_t c, l;
2684 
2685     if (ilen < 2) {
2686         return 0;
2687     }
2688     s0 = cpu_lduw_data_ra(env, addr, ra);
2689     if ((s0 & 0xfc00) != 0xd800) {
2690         /* one word character */
2691         l = 2;
2692         c = s0;
2693     } else {
2694         /* two word character */
2695         l = 4;
2696         if (ilen < 4) {
2697             return 0;
2698         }
2699         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2700         c = extract32(s0, 6, 4) + 1;
2701         c = (c << 6) | (s0 & 0x3f);
2702         c = (c << 10) | (s1 & 0x3ff);
2703         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2704             /* invalid surrogate character */
2705             return 2;
2706         }
2707     }
2708 
2709     *ochar = c;
2710     *olen = l;
2711     return -1;
2712 }
2713 
2714 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2715                         bool enh_check, uintptr_t ra,
2716                         uint32_t *ochar, uint32_t *olen)
2717 {
2718     uint32_t c;
2719 
2720     if (ilen < 4) {
2721         return 0;
2722     }
2723     c = cpu_ldl_data_ra(env, addr, ra);
2724     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2725         /* invalid unicode character */
2726         return 2;
2727     }
2728 
2729     *ochar = c;
2730     *olen = 4;
2731     return -1;
2732 }
2733 
2734 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2735                        uintptr_t ra, uint32_t c, uint32_t *olen)
2736 {
2737     uint8_t d[4];
2738     uint32_t l, i;
2739 
2740     if (c <= 0x7f) {
2741         /* one byte character */
2742         l = 1;
2743         d[0] = c;
2744     } else if (c <= 0x7ff) {
2745         /* two byte character */
2746         l = 2;
2747         d[1] = 0x80 | extract32(c, 0, 6);
2748         d[0] = 0xc0 | extract32(c, 6, 5);
2749     } else if (c <= 0xffff) {
2750         /* three byte character */
2751         l = 3;
2752         d[2] = 0x80 | extract32(c, 0, 6);
2753         d[1] = 0x80 | extract32(c, 6, 6);
2754         d[0] = 0xe0 | extract32(c, 12, 4);
2755     } else {
2756         /* four byte character */
2757         l = 4;
2758         d[3] = 0x80 | extract32(c, 0, 6);
2759         d[2] = 0x80 | extract32(c, 6, 6);
2760         d[1] = 0x80 | extract32(c, 12, 6);
2761         d[0] = 0xf0 | extract32(c, 18, 3);
2762     }
2763 
2764     if (ilen < l) {
2765         return 1;
2766     }
2767     for (i = 0; i < l; ++i) {
2768         cpu_stb_data_ra(env, addr + i, d[i], ra);
2769     }
2770 
2771     *olen = l;
2772     return -1;
2773 }
2774 
2775 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2776                         uintptr_t ra, uint32_t c, uint32_t *olen)
2777 {
2778     uint16_t d0, d1;
2779 
2780     if (c <= 0xffff) {
2781         /* one word character */
2782         if (ilen < 2) {
2783             return 1;
2784         }
2785         cpu_stw_data_ra(env, addr, c, ra);
2786         *olen = 2;
2787     } else {
2788         /* two word character */
2789         if (ilen < 4) {
2790             return 1;
2791         }
2792         d1 = 0xdc00 | extract32(c, 0, 10);
2793         d0 = 0xd800 | extract32(c, 10, 6);
2794         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2795         cpu_stw_data_ra(env, addr + 0, d0, ra);
2796         cpu_stw_data_ra(env, addr + 2, d1, ra);
2797         *olen = 4;
2798     }
2799 
2800     return -1;
2801 }
2802 
2803 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2804                         uintptr_t ra, uint32_t c, uint32_t *olen)
2805 {
2806     if (ilen < 4) {
2807         return 1;
2808     }
2809     cpu_stl_data_ra(env, addr, c, ra);
2810     *olen = 4;
2811     return -1;
2812 }
2813 
2814 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2815                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2816                                        decode_unicode_fn decode,
2817                                        encode_unicode_fn encode)
2818 {
2819     uint64_t dst = get_address(env, r1);
2820     uint64_t dlen = get_length(env, r1 + 1);
2821     uint64_t src = get_address(env, r2);
2822     uint64_t slen = get_length(env, r2 + 1);
2823     bool enh_check = m3 & 1;
2824     int cc, i;
2825 
2826     /* Lest we fail to service interrupts in a timely manner, limit the
2827        amount of work we're willing to do.  For now, let's cap at 256.  */
2828     for (i = 0; i < 256; ++i) {
2829         uint32_t c, ilen, olen;
2830 
2831         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2832         if (unlikely(cc >= 0)) {
2833             break;
2834         }
2835         cc = encode(env, dst, dlen, ra, c, &olen);
2836         if (unlikely(cc >= 0)) {
2837             break;
2838         }
2839 
2840         src += ilen;
2841         slen -= ilen;
2842         dst += olen;
2843         dlen -= olen;
2844         cc = 3;
2845     }
2846 
2847     set_address(env, r1, dst);
2848     set_length(env, r1 + 1, dlen);
2849     set_address(env, r2, src);
2850     set_length(env, r2 + 1, slen);
2851 
2852     return cc;
2853 }
2854 
2855 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2856 {
2857     return convert_unicode(env, r1, r2, m3, GETPC(),
2858                            decode_utf8, encode_utf16);
2859 }
2860 
2861 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2862 {
2863     return convert_unicode(env, r1, r2, m3, GETPC(),
2864                            decode_utf8, encode_utf32);
2865 }
2866 
2867 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2868 {
2869     return convert_unicode(env, r1, r2, m3, GETPC(),
2870                            decode_utf16, encode_utf8);
2871 }
2872 
2873 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2874 {
2875     return convert_unicode(env, r1, r2, m3, GETPC(),
2876                            decode_utf16, encode_utf32);
2877 }
2878 
2879 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2880 {
2881     return convert_unicode(env, r1, r2, m3, GETPC(),
2882                            decode_utf32, encode_utf8);
2883 }
2884 
2885 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2886 {
2887     return convert_unicode(env, r1, r2, m3, GETPC(),
2888                            decode_utf32, encode_utf16);
2889 }
2890 
2891 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2892                         uintptr_t ra)
2893 {
2894     const int mmu_idx = s390x_env_mmu_index(env, false);
2895 
2896     /* test the actual access, not just any access to the page due to LAP */
2897     while (len) {
2898         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2899         const uint64_t curlen = MIN(pagelen, len);
2900 
2901         probe_write(env, addr, curlen, mmu_idx, ra);
2902         addr = wrap_address(env, addr + curlen);
2903         len -= curlen;
2904     }
2905 }
2906 
2907 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2908 {
2909     probe_write_access(env, addr, len, GETPC());
2910 }
2911