xref: /qemu/target/s390x/tcg/mem_helper.c (revision b103cc6e74ac92f070a0e004bd84334e845c20b5)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/cpu-common.h"
28 #include "exec/exec-all.h"
29 #include "exec/cputlb.h"
30 #include "exec/page-protection.h"
31 #include "exec/cpu_ldst.h"
32 #include "exec/tlb-flags.h"
33 #include "accel/tcg/cpu-ops.h"
34 #include "qemu/int128.h"
35 #include "qemu/atomic128.h"
36 
37 #if defined(CONFIG_USER_ONLY)
38 #include "user/page-protection.h"
39 #else
40 #include "hw/s390x/storage-keys.h"
41 #include "hw/boards.h"
42 #endif
43 
44 #ifdef CONFIG_USER_ONLY
45 # define user_or_likely(X)    true
46 #else
47 # define user_or_likely(X)    likely(X)
48 #endif
49 
50 /*****************************************************************************/
51 /* Softmmu support */
52 
53 /* #define DEBUG_HELPER */
54 #ifdef DEBUG_HELPER
55 #define HELPER_LOG(x...) qemu_log(x)
56 #else
57 #define HELPER_LOG(x...)
58 #endif
59 
60 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
61 {
62     uint16_t pkm = env->cregs[3] >> 16;
63 
64     if (env->psw.mask & PSW_MASK_PSTATE) {
65         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
66         return pkm & (0x8000 >> psw_key);
67     }
68     return true;
69 }
70 
71 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
72                                    uint64_t src, uint32_t len)
73 {
74     if (!len || src == dest) {
75         return false;
76     }
77     /* Take care of wrapping at the end of address space. */
78     if (unlikely(wrap_address(env, src + len - 1) < src)) {
79         return dest > src || dest <= wrap_address(env, src + len - 1);
80     }
81     return dest > src && dest <= src + len - 1;
82 }
83 
84 /* Trigger a SPECIFICATION exception if an address or a length is not
85    naturally aligned.  */
86 static inline void check_alignment(CPUS390XState *env, uint64_t v,
87                                    int wordsize, uintptr_t ra)
88 {
89     if (v % wordsize) {
90         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
91     }
92 }
93 
94 /* Load a value from memory according to its size.  */
95 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
96                                            int wordsize, uintptr_t ra)
97 {
98     switch (wordsize) {
99     case 1:
100         return cpu_ldub_data_ra(env, addr, ra);
101     case 2:
102         return cpu_lduw_data_ra(env, addr, ra);
103     default:
104         abort();
105     }
106 }
107 
108 /* Store a to memory according to its size.  */
109 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
110                                       uint64_t value, int wordsize,
111                                       uintptr_t ra)
112 {
113     switch (wordsize) {
114     case 1:
115         cpu_stb_data_ra(env, addr, value, ra);
116         break;
117     case 2:
118         cpu_stw_data_ra(env, addr, value, ra);
119         break;
120     default:
121         abort();
122     }
123 }
124 
125 /* An access covers at most 4096 bytes and therefore at most two pages. */
126 typedef struct S390Access {
127     target_ulong vaddr1;
128     target_ulong vaddr2;
129     void *haddr1;
130     void *haddr2;
131     uint16_t size1;
132     uint16_t size2;
133     /*
134      * If we can't access the host page directly, we'll have to do I/O access
135      * via ld/st helpers. These are internal details, so we store the
136      * mmu idx to do the access here instead of passing it around in the
137      * helpers.
138      */
139     int mmu_idx;
140 } S390Access;
141 
142 /*
143  * With nonfault=1, return the PGM_ exception that would have been injected
144  * into the guest; return 0 if no exception was detected.
145  *
146  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
147  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
148  */
149 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
150                                     int size, MMUAccessType access_type,
151                                     int mmu_idx, bool nonfault,
152                                     void **phost, uintptr_t ra)
153 {
154     int flags = probe_access_flags(env, addr, size, access_type, mmu_idx,
155                                    nonfault, phost, ra);
156 
157     if (unlikely(flags & TLB_INVALID_MASK)) {
158 #ifdef CONFIG_USER_ONLY
159         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
160         env->__excp_addr = addr & TARGET_PAGE_MASK;
161         return (page_get_flags(addr) & PAGE_VALID
162                 ? PGM_PROTECTION : PGM_ADDRESSING);
163 #else
164         return env->tlb_fill_exc;
165 #endif
166     }
167 
168 #ifndef CONFIG_USER_ONLY
169     if (unlikely(flags & TLB_WATCHPOINT)) {
170         /* S390 does not presently use transaction attributes. */
171         cpu_check_watchpoint(env_cpu(env), addr, size,
172                              MEMTXATTRS_UNSPECIFIED,
173                              (access_type == MMU_DATA_STORE
174                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
175     }
176 #endif
177 
178     return 0;
179 }
180 
181 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
182                              bool nonfault, vaddr vaddr1, int size,
183                              MMUAccessType access_type,
184                              int mmu_idx, uintptr_t ra)
185 {
186     int size1, size2, exc;
187 
188     assert(size > 0 && size <= 4096);
189 
190     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
191     size2 = size - size1;
192 
193     memset(access, 0, sizeof(*access));
194     access->vaddr1 = vaddr1;
195     access->size1 = size1;
196     access->size2 = size2;
197     access->mmu_idx = mmu_idx;
198 
199     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
200                             &access->haddr1, ra);
201     if (unlikely(exc)) {
202         return exc;
203     }
204     if (unlikely(size2)) {
205         /* The access crosses page boundaries. */
206         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
207 
208         access->vaddr2 = vaddr2;
209         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
210                                 nonfault, &access->haddr2, ra);
211         if (unlikely(exc)) {
212             return exc;
213         }
214     }
215     return 0;
216 }
217 
218 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
219                                   vaddr vaddr, int size,
220                                   MMUAccessType access_type, int mmu_idx,
221                                   uintptr_t ra)
222 {
223     int exc = access_prepare_nf(ret, env, false, vaddr, size,
224                                 access_type, mmu_idx, ra);
225     assert(!exc);
226 }
227 
228 /* Helper to handle memset on a single page. */
229 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
230                              uint8_t byte, uint16_t size, int mmu_idx,
231                              uintptr_t ra)
232 {
233     if (user_or_likely(haddr)) {
234         memset(haddr, byte, size);
235     } else {
236         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
237         for (int i = 0; i < size; i++) {
238             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
239         }
240     }
241 }
242 
243 static void access_memset(CPUS390XState *env, S390Access *desta,
244                           uint8_t byte, uintptr_t ra)
245 {
246     set_helper_retaddr(ra);
247     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
248                      desta->mmu_idx, ra);
249     if (unlikely(desta->size2)) {
250         do_access_memset(env, desta->vaddr2, desta->haddr2, byte,
251                          desta->size2, desta->mmu_idx, ra);
252     }
253     clear_helper_retaddr();
254 }
255 
256 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
257                                int offset, uintptr_t ra)
258 {
259     target_ulong vaddr = access->vaddr1;
260     void *haddr = access->haddr1;
261 
262     if (unlikely(offset >= access->size1)) {
263         offset -= access->size1;
264         vaddr = access->vaddr2;
265         haddr = access->haddr2;
266     }
267 
268     if (user_or_likely(haddr)) {
269         return ldub_p(haddr + offset);
270     } else {
271         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
272         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
273     }
274 }
275 
276 static void access_set_byte(CPUS390XState *env, S390Access *access,
277                             int offset, uint8_t byte, uintptr_t ra)
278 {
279     target_ulong vaddr = access->vaddr1;
280     void *haddr = access->haddr1;
281 
282     if (unlikely(offset >= access->size1)) {
283         offset -= access->size1;
284         vaddr = access->vaddr2;
285         haddr = access->haddr2;
286     }
287 
288     if (user_or_likely(haddr)) {
289         stb_p(haddr + offset, byte);
290     } else {
291         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
292         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
293     }
294 }
295 
296 /*
297  * Move data with the same semantics as memmove() in case ranges don't overlap
298  * or src > dest. Undefined behavior on destructive overlaps.
299  */
300 static void access_memmove(CPUS390XState *env, S390Access *desta,
301                            S390Access *srca, uintptr_t ra)
302 {
303     int len = desta->size1 + desta->size2;
304 
305     assert(len == srca->size1 + srca->size2);
306 
307     /* Fallback to slow access in case we don't have access to all host pages */
308     if (user_or_likely(desta->haddr1 &&
309                        srca->haddr1 &&
310                        (!desta->size2 || desta->haddr2) &&
311                        (!srca->size2 || srca->haddr2))) {
312         int diff = desta->size1 - srca->size1;
313 
314         if (likely(diff == 0)) {
315             memmove(desta->haddr1, srca->haddr1, srca->size1);
316             if (unlikely(srca->size2)) {
317                 memmove(desta->haddr2, srca->haddr2, srca->size2);
318             }
319         } else if (diff > 0) {
320             memmove(desta->haddr1, srca->haddr1, srca->size1);
321             memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
322             if (likely(desta->size2)) {
323                 memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
324             }
325         } else {
326             diff = -diff;
327             memmove(desta->haddr1, srca->haddr1, desta->size1);
328             memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
329             if (likely(srca->size2)) {
330                 memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
331             }
332         }
333     } else {
334         for (int i = 0; i < len; i++) {
335             uint8_t byte = access_get_byte(env, srca, i, ra);
336             access_set_byte(env, desta, i, byte, ra);
337         }
338     }
339 }
340 
341 static int mmu_idx_from_as(uint8_t as)
342 {
343     switch (as) {
344     case AS_PRIMARY:
345         return MMU_PRIMARY_IDX;
346     case AS_SECONDARY:
347         return MMU_SECONDARY_IDX;
348     case AS_HOME:
349         return MMU_HOME_IDX;
350     default:
351         /* FIXME AS_ACCREG */
352         g_assert_not_reached();
353     }
354 }
355 
356 /* and on array */
357 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
358                              uint64_t src, uintptr_t ra)
359 {
360     const int mmu_idx = s390x_env_mmu_index(env, false);
361     S390Access srca1, srca2, desta;
362     uint32_t i;
363     uint8_t c = 0;
364 
365     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
366                __func__, l, dest, src);
367 
368     /* NC always processes one more byte than specified - maximum is 256 */
369     l++;
370 
371     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
372     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
373     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
374     set_helper_retaddr(ra);
375 
376     for (i = 0; i < l; i++) {
377         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
378                           access_get_byte(env, &srca2, i, ra);
379 
380         c |= x;
381         access_set_byte(env, &desta, i, x, ra);
382     }
383 
384     clear_helper_retaddr();
385     return c != 0;
386 }
387 
388 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
389                     uint64_t src)
390 {
391     return do_helper_nc(env, l, dest, src, GETPC());
392 }
393 
394 /* xor on array */
395 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
396                              uint64_t src, uintptr_t ra)
397 {
398     const int mmu_idx = s390x_env_mmu_index(env, false);
399     S390Access srca1, srca2, desta;
400     uint32_t i;
401     uint8_t c = 0;
402 
403     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
404                __func__, l, dest, src);
405 
406     /* XC always processes one more byte than specified - maximum is 256 */
407     l++;
408 
409     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
410     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
411     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
412 
413     /* xor with itself is the same as memset(0) */
414     if (src == dest) {
415         access_memset(env, &desta, 0, ra);
416         return 0;
417     }
418 
419     set_helper_retaddr(ra);
420     for (i = 0; i < l; i++) {
421         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
422                           access_get_byte(env, &srca2, i, ra);
423 
424         c |= x;
425         access_set_byte(env, &desta, i, x, ra);
426     }
427     clear_helper_retaddr();
428     return c != 0;
429 }
430 
431 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
432                     uint64_t src)
433 {
434     return do_helper_xc(env, l, dest, src, GETPC());
435 }
436 
437 /* or on array */
438 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
439                              uint64_t src, uintptr_t ra)
440 {
441     const int mmu_idx = s390x_env_mmu_index(env, false);
442     S390Access srca1, srca2, desta;
443     uint32_t i;
444     uint8_t c = 0;
445 
446     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
447                __func__, l, dest, src);
448 
449     /* OC always processes one more byte than specified - maximum is 256 */
450     l++;
451 
452     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
453     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
454     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
455     set_helper_retaddr(ra);
456 
457     for (i = 0; i < l; i++) {
458         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
459                           access_get_byte(env, &srca2, i, ra);
460 
461         c |= x;
462         access_set_byte(env, &desta, i, x, ra);
463     }
464 
465     clear_helper_retaddr();
466     return c != 0;
467 }
468 
469 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
470                     uint64_t src)
471 {
472     return do_helper_oc(env, l, dest, src, GETPC());
473 }
474 
475 /* memmove */
476 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
477                               uint64_t src, uintptr_t ra)
478 {
479     const int mmu_idx = s390x_env_mmu_index(env, false);
480     S390Access srca, desta;
481     uint32_t i;
482 
483     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
484                __func__, l, dest, src);
485 
486     /* MVC always copies one more byte than specified - maximum is 256 */
487     l++;
488 
489     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
490     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
491 
492     /*
493      * "When the operands overlap, the result is obtained as if the operands
494      * were processed one byte at a time". Only non-destructive overlaps
495      * behave like memmove().
496      */
497     if (dest == src + 1) {
498         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
499     } else if (!is_destructive_overlap(env, dest, src, l)) {
500         access_memmove(env, &desta, &srca, ra);
501     } else {
502         set_helper_retaddr(ra);
503         for (i = 0; i < l; i++) {
504             uint8_t byte = access_get_byte(env, &srca, i, ra);
505 
506             access_set_byte(env, &desta, i, byte, ra);
507         }
508         clear_helper_retaddr();
509     }
510 
511     return env->cc_op;
512 }
513 
514 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
515 {
516     do_helper_mvc(env, l, dest, src, GETPC());
517 }
518 
519 /* move right to left */
520 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
521 {
522     const int mmu_idx = s390x_env_mmu_index(env, false);
523     const uint64_t ra = GETPC();
524     S390Access srca, desta;
525     int32_t i;
526 
527     /* MVCRL always copies one more byte than specified - maximum is 256 */
528     l &= 0xff;
529     l++;
530 
531     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
532     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
533 
534     set_helper_retaddr(ra);
535     for (i = l - 1; i >= 0; i--) {
536         uint8_t byte = access_get_byte(env, &srca, i, ra);
537         access_set_byte(env, &desta, i, byte, ra);
538     }
539     clear_helper_retaddr();
540 }
541 
542 /* move inverse  */
543 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
544 {
545     const int mmu_idx = s390x_env_mmu_index(env, false);
546     S390Access srca, desta;
547     uintptr_t ra = GETPC();
548     int i;
549 
550     /* MVCIN always copies one more byte than specified - maximum is 256 */
551     l++;
552 
553     src = wrap_address(env, src - l + 1);
554     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
555     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
556 
557     set_helper_retaddr(ra);
558     for (i = 0; i < l; i++) {
559         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
560         access_set_byte(env, &desta, i, x, ra);
561     }
562     clear_helper_retaddr();
563 }
564 
565 /* move numerics  */
566 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
567 {
568     const int mmu_idx = s390x_env_mmu_index(env, false);
569     S390Access srca1, srca2, desta;
570     uintptr_t ra = GETPC();
571     int i;
572 
573     /* MVN always copies one more byte than specified - maximum is 256 */
574     l++;
575 
576     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
577     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
578     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
579 
580     set_helper_retaddr(ra);
581     for (i = 0; i < l; i++) {
582         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
583                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
584 
585         access_set_byte(env, &desta, i, x, ra);
586     }
587     clear_helper_retaddr();
588 }
589 
590 /* move with offset  */
591 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
592 {
593     const int mmu_idx = s390x_env_mmu_index(env, false);
594     /* MVO always processes one more byte than specified - maximum is 16 */
595     const int len_dest = (l >> 4) + 1;
596     const int len_src = (l & 0xf) + 1;
597     uintptr_t ra = GETPC();
598     uint8_t byte_dest, byte_src;
599     S390Access srca, desta;
600     int i, j;
601 
602     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
603     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
604 
605     /* Handle rightmost byte */
606     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
607 
608     set_helper_retaddr(ra);
609     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
610     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
611     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
612 
613     /* Process remaining bytes from right to left */
614     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
615         byte_dest = byte_src >> 4;
616         if (j >= 0) {
617             byte_src = access_get_byte(env, &srca, j, ra);
618         } else {
619             byte_src = 0;
620         }
621         byte_dest |= byte_src << 4;
622         access_set_byte(env, &desta, i, byte_dest, ra);
623     }
624     clear_helper_retaddr();
625 }
626 
627 /* move zones  */
628 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
629 {
630     const int mmu_idx = s390x_env_mmu_index(env, false);
631     S390Access srca1, srca2, desta;
632     uintptr_t ra = GETPC();
633     int i;
634 
635     /* MVZ always copies one more byte than specified - maximum is 256 */
636     l++;
637 
638     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
639     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
640     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
641 
642     set_helper_retaddr(ra);
643     for (i = 0; i < l; i++) {
644         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
645                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
646 
647         access_set_byte(env, &desta, i, x, ra);
648     }
649     clear_helper_retaddr();
650 }
651 
652 /* compare unsigned byte arrays */
653 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
654                               uint64_t s2, uintptr_t ra)
655 {
656     uint32_t i;
657     uint32_t cc = 0;
658 
659     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
660                __func__, l, s1, s2);
661 
662     for (i = 0; i <= l; i++) {
663         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
664         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
665         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
666         if (x < y) {
667             cc = 1;
668             break;
669         } else if (x > y) {
670             cc = 2;
671             break;
672         }
673     }
674 
675     HELPER_LOG("\n");
676     return cc;
677 }
678 
679 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
680 {
681     return do_helper_clc(env, l, s1, s2, GETPC());
682 }
683 
684 /* compare logical under mask */
685 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
686                      uint64_t addr)
687 {
688     uintptr_t ra = GETPC();
689     uint32_t cc = 0;
690 
691     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
692                mask, addr);
693 
694     if (!mask) {
695         /* Recognize access exceptions for the first byte */
696         probe_read(env, addr, 1, s390x_env_mmu_index(env, false), ra);
697     }
698 
699     while (mask) {
700         if (mask & 8) {
701             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
702             uint8_t r = extract32(r1, 24, 8);
703             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
704                        addr);
705             if (r < d) {
706                 cc = 1;
707                 break;
708             } else if (r > d) {
709                 cc = 2;
710                 break;
711             }
712             addr++;
713         }
714         mask = (mask << 1) & 0xf;
715         r1 <<= 8;
716     }
717 
718     HELPER_LOG("\n");
719     return cc;
720 }
721 
722 static inline uint64_t get_address(CPUS390XState *env, int reg)
723 {
724     return wrap_address(env, env->regs[reg]);
725 }
726 
727 /*
728  * Store the address to the given register, zeroing out unused leftmost
729  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
730  */
731 static inline void set_address_zero(CPUS390XState *env, int reg,
732                                     uint64_t address)
733 {
734     if (env->psw.mask & PSW_MASK_64) {
735         env->regs[reg] = address;
736     } else {
737         if (!(env->psw.mask & PSW_MASK_32)) {
738             address &= 0x00ffffff;
739         } else {
740             address &= 0x7fffffff;
741         }
742         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
743     }
744 }
745 
746 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
747 {
748     if (env->psw.mask & PSW_MASK_64) {
749         /* 64-Bit mode */
750         env->regs[reg] = address;
751     } else {
752         if (!(env->psw.mask & PSW_MASK_32)) {
753             /* 24-Bit mode. According to the PoO it is implementation
754             dependent if bits 32-39 remain unchanged or are set to
755             zeros.  Choose the former so that the function can also be
756             used for TRT.  */
757             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
758         } else {
759             /* 31-Bit mode. According to the PoO it is implementation
760             dependent if bit 32 remains unchanged or is set to zero.
761             Choose the latter so that the function can also be used for
762             TRT.  */
763             address &= 0x7fffffff;
764             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
765         }
766     }
767 }
768 
769 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
770 {
771     if (!(env->psw.mask & PSW_MASK_64)) {
772         return (uint32_t)length;
773     }
774     return length;
775 }
776 
777 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
778 {
779     if (!(env->psw.mask & PSW_MASK_64)) {
780         /* 24-Bit and 31-Bit mode */
781         length &= 0x7fffffff;
782     }
783     return length;
784 }
785 
786 static inline uint64_t get_length(CPUS390XState *env, int reg)
787 {
788     return wrap_length31(env, env->regs[reg]);
789 }
790 
791 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
792 {
793     if (env->psw.mask & PSW_MASK_64) {
794         /* 64-Bit mode */
795         env->regs[reg] = length;
796     } else {
797         /* 24-Bit and 31-Bit mode */
798         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
799     }
800 }
801 
802 /* search string (c is byte to search, r2 is string, r1 end of string) */
803 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
804 {
805     uintptr_t ra = GETPC();
806     uint64_t end, str;
807     uint32_t len;
808     uint8_t v, c = env->regs[0];
809 
810     /* Bits 32-55 must contain all 0.  */
811     if (env->regs[0] & 0xffffff00u) {
812         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
813     }
814 
815     str = get_address(env, r2);
816     end = get_address(env, r1);
817 
818     /* Lest we fail to service interrupts in a timely manner, limit the
819        amount of work we're willing to do.  For now, let's cap at 8k.  */
820     for (len = 0; len < 0x2000; ++len) {
821         if (str + len == end) {
822             /* Character not found.  R1 & R2 are unmodified.  */
823             env->cc_op = 2;
824             return;
825         }
826         v = cpu_ldub_data_ra(env, str + len, ra);
827         if (v == c) {
828             /* Character found.  Set R1 to the location; R2 is unmodified.  */
829             env->cc_op = 1;
830             set_address(env, r1, str + len);
831             return;
832         }
833     }
834 
835     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
836     env->cc_op = 3;
837     set_address(env, r2, str + len);
838 }
839 
840 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
841 {
842     uintptr_t ra = GETPC();
843     uint32_t len;
844     uint16_t v, c = env->regs[0];
845     uint64_t end, str, adj_end;
846 
847     /* Bits 32-47 of R0 must be zero.  */
848     if (env->regs[0] & 0xffff0000u) {
849         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
850     }
851 
852     str = get_address(env, r2);
853     end = get_address(env, r1);
854 
855     /* If the LSB of the two addresses differ, use one extra byte.  */
856     adj_end = end + ((str ^ end) & 1);
857 
858     /* Lest we fail to service interrupts in a timely manner, limit the
859        amount of work we're willing to do.  For now, let's cap at 8k.  */
860     for (len = 0; len < 0x2000; len += 2) {
861         if (str + len == adj_end) {
862             /* End of input found.  */
863             env->cc_op = 2;
864             return;
865         }
866         v = cpu_lduw_data_ra(env, str + len, ra);
867         if (v == c) {
868             /* Character found.  Set R1 to the location; R2 is unmodified.  */
869             env->cc_op = 1;
870             set_address(env, r1, str + len);
871             return;
872         }
873     }
874 
875     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
876     env->cc_op = 3;
877     set_address(env, r2, str + len);
878 }
879 
880 /* unsigned string compare (c is string terminator) */
881 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
882 {
883     uintptr_t ra = GETPC();
884     uint32_t len;
885 
886     c = c & 0xff;
887     s1 = wrap_address(env, s1);
888     s2 = wrap_address(env, s2);
889 
890     /* Lest we fail to service interrupts in a timely manner, limit the
891        amount of work we're willing to do.  For now, let's cap at 8k.  */
892     for (len = 0; len < 0x2000; ++len) {
893         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
894         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
895         if (v1 == v2) {
896             if (v1 == c) {
897                 /* Equal.  CC=0, and don't advance the registers.  */
898                 env->cc_op = 0;
899                 return int128_make128(s2, s1);
900             }
901         } else {
902             /* Unequal.  CC={1,2}, and advance the registers.  Note that
903                the terminator need not be zero, but the string that contains
904                the terminator is by definition "low".  */
905             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
906             return int128_make128(s2 + len, s1 + len);
907         }
908     }
909 
910     /* CPU-determined bytes equal; advance the registers.  */
911     env->cc_op = 3;
912     return int128_make128(s2 + len, s1 + len);
913 }
914 
915 /* move page */
916 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
917 {
918     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
919     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
920     const int mmu_idx = s390x_env_mmu_index(env, false);
921     const bool f = extract64(r0, 11, 1);
922     const bool s = extract64(r0, 10, 1);
923     const bool cco = extract64(r0, 8, 1);
924     uintptr_t ra = GETPC();
925     S390Access srca, desta;
926     int exc;
927 
928     if ((f && s) || extract64(r0, 12, 4)) {
929         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
930     }
931 
932     /*
933      * We always manually handle exceptions such that we can properly store
934      * r1/r2 to the lowcore on page-translation exceptions.
935      *
936      * TODO: Access key handling
937      */
938     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
939                             MMU_DATA_LOAD, mmu_idx, ra);
940     if (exc) {
941         if (cco) {
942             return 2;
943         }
944         goto inject_exc;
945     }
946     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
947                             MMU_DATA_STORE, mmu_idx, ra);
948     if (exc) {
949         if (cco && exc != PGM_PROTECTION) {
950             return 1;
951         }
952         goto inject_exc;
953     }
954     access_memmove(env, &desta, &srca, ra);
955     return 0; /* data moved */
956 inject_exc:
957 #if !defined(CONFIG_USER_ONLY)
958     if (exc != PGM_ADDRESSING) {
959         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
960                  env->tlb_fill_tec);
961     }
962     if (exc == PGM_PAGE_TRANS) {
963         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
964                  r1 << 4 | r2);
965     }
966 #endif
967     tcg_s390_program_interrupt(env, exc, ra);
968 }
969 
970 /* string copy */
971 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
972 {
973     const int mmu_idx = s390x_env_mmu_index(env, false);
974     const uint64_t d = get_address(env, r1);
975     const uint64_t s = get_address(env, r2);
976     const uint8_t c = env->regs[0];
977     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
978     S390Access srca, desta;
979     uintptr_t ra = GETPC();
980     int i;
981 
982     if (env->regs[0] & 0xffffff00ull) {
983         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
984     }
985 
986     /*
987      * Our access should not exceed single pages, as we must not report access
988      * exceptions exceeding the actually copied range (which we don't know at
989      * this point). We might over-indicate watchpoints within the pages
990      * (if we ever care, we have to limit processing to a single byte).
991      */
992     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
993     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
994 
995     set_helper_retaddr(ra);
996     for (i = 0; i < len; i++) {
997         const uint8_t v = access_get_byte(env, &srca, i, ra);
998 
999         access_set_byte(env, &desta, i, v, ra);
1000         if (v == c) {
1001             clear_helper_retaddr();
1002             set_address_zero(env, r1, d + i);
1003             return 1;
1004         }
1005     }
1006     clear_helper_retaddr();
1007     set_address_zero(env, r1, d + len);
1008     set_address_zero(env, r2, s + len);
1009     return 3;
1010 }
1011 
1012 /* load access registers r1 to r3 from memory at a2 */
1013 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1014 {
1015     uintptr_t ra = GETPC();
1016     int i;
1017 
1018     if (a2 & 0x3) {
1019         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1020     }
1021 
1022     for (i = r1;; i = (i + 1) % 16) {
1023         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1024         a2 += 4;
1025 
1026         if (i == r3) {
1027             break;
1028         }
1029     }
1030 }
1031 
1032 /* store access registers r1 to r3 in memory at a2 */
1033 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1034 {
1035     uintptr_t ra = GETPC();
1036     int i;
1037 
1038     if (a2 & 0x3) {
1039         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1040     }
1041 
1042     for (i = r1;; i = (i + 1) % 16) {
1043         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1044         a2 += 4;
1045 
1046         if (i == r3) {
1047             break;
1048         }
1049     }
1050 }
1051 
1052 /* move long helper */
1053 static inline uint32_t do_mvcl(CPUS390XState *env,
1054                                uint64_t *dest, uint64_t *destlen,
1055                                uint64_t *src, uint64_t *srclen,
1056                                uint16_t pad, int wordsize, uintptr_t ra)
1057 {
1058     const int mmu_idx = s390x_env_mmu_index(env, false);
1059     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1060     S390Access srca, desta;
1061     int i, cc;
1062 
1063     if (*destlen == *srclen) {
1064         cc = 0;
1065     } else if (*destlen < *srclen) {
1066         cc = 1;
1067     } else {
1068         cc = 2;
1069     }
1070 
1071     if (!*destlen) {
1072         return cc;
1073     }
1074 
1075     /*
1076      * Only perform one type of type of operation (move/pad) at a time.
1077      * Stay within single pages.
1078      */
1079     if (*srclen) {
1080         /* Copy the src array */
1081         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1082         *destlen -= len;
1083         *srclen -= len;
1084         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1085         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1086         access_memmove(env, &desta, &srca, ra);
1087         *src = wrap_address(env, *src + len);
1088         *dest = wrap_address(env, *dest + len);
1089     } else if (wordsize == 1) {
1090         /* Pad the remaining area */
1091         *destlen -= len;
1092         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1093         access_memset(env, &desta, pad, ra);
1094         *dest = wrap_address(env, *dest + len);
1095     } else {
1096         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1097         set_helper_retaddr(ra);
1098 
1099         /* The remaining length selects the padding byte. */
1100         for (i = 0; i < len; (*destlen)--, i++) {
1101             if (*destlen & 1) {
1102                 access_set_byte(env, &desta, i, pad, ra);
1103             } else {
1104                 access_set_byte(env, &desta, i, pad >> 8, ra);
1105             }
1106         }
1107         clear_helper_retaddr();
1108         *dest = wrap_address(env, *dest + len);
1109     }
1110 
1111     return *destlen ? 3 : cc;
1112 }
1113 
1114 /* move long */
1115 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1116 {
1117     const int mmu_idx = s390x_env_mmu_index(env, false);
1118     uintptr_t ra = GETPC();
1119     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1120     uint64_t dest = get_address(env, r1);
1121     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1122     uint64_t src = get_address(env, r2);
1123     uint8_t pad = env->regs[r2 + 1] >> 24;
1124     CPUState *cs = env_cpu(env);
1125     S390Access srca, desta;
1126     uint32_t cc, cur_len;
1127 
1128     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1129         cc = 3;
1130     } else if (srclen == destlen) {
1131         cc = 0;
1132     } else if (destlen < srclen) {
1133         cc = 1;
1134     } else {
1135         cc = 2;
1136     }
1137 
1138     /* We might have to zero-out some bits even if there was no action. */
1139     if (unlikely(!destlen || cc == 3)) {
1140         set_address_zero(env, r2, src);
1141         set_address_zero(env, r1, dest);
1142         return cc;
1143     } else if (!srclen) {
1144         set_address_zero(env, r2, src);
1145     }
1146 
1147     /*
1148      * Only perform one type of type of operation (move/pad) in one step.
1149      * Stay within single pages.
1150      */
1151     while (destlen) {
1152         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1153         if (!srclen) {
1154             access_prepare(&desta, env, dest, cur_len,
1155                            MMU_DATA_STORE, mmu_idx, ra);
1156             access_memset(env, &desta, pad, ra);
1157         } else {
1158             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1159 
1160             access_prepare(&srca, env, src, cur_len,
1161                            MMU_DATA_LOAD, mmu_idx, ra);
1162             access_prepare(&desta, env, dest, cur_len,
1163                            MMU_DATA_STORE, mmu_idx, ra);
1164             access_memmove(env, &desta, &srca, ra);
1165             src = wrap_address(env, src + cur_len);
1166             srclen -= cur_len;
1167             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1168             set_address_zero(env, r2, src);
1169         }
1170         dest = wrap_address(env, dest + cur_len);
1171         destlen -= cur_len;
1172         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1173         set_address_zero(env, r1, dest);
1174 
1175         /*
1176          * MVCL is interruptible. Return to the main loop if requested after
1177          * writing back all state to registers. If no interrupt will get
1178          * injected, we'll end up back in this handler and continue processing
1179          * the remaining parts.
1180          */
1181         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1182             cpu_loop_exit_restore(cs, ra);
1183         }
1184     }
1185     return cc;
1186 }
1187 
1188 /* move long extended */
1189 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1190                        uint32_t r3)
1191 {
1192     uintptr_t ra = GETPC();
1193     uint64_t destlen = get_length(env, r1 + 1);
1194     uint64_t dest = get_address(env, r1);
1195     uint64_t srclen = get_length(env, r3 + 1);
1196     uint64_t src = get_address(env, r3);
1197     uint8_t pad = a2;
1198     uint32_t cc;
1199 
1200     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1201 
1202     set_length(env, r1 + 1, destlen);
1203     set_length(env, r3 + 1, srclen);
1204     set_address(env, r1, dest);
1205     set_address(env, r3, src);
1206 
1207     return cc;
1208 }
1209 
1210 /* move long unicode */
1211 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1212                        uint32_t r3)
1213 {
1214     uintptr_t ra = GETPC();
1215     uint64_t destlen = get_length(env, r1 + 1);
1216     uint64_t dest = get_address(env, r1);
1217     uint64_t srclen = get_length(env, r3 + 1);
1218     uint64_t src = get_address(env, r3);
1219     uint16_t pad = a2;
1220     uint32_t cc;
1221 
1222     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1223 
1224     set_length(env, r1 + 1, destlen);
1225     set_length(env, r3 + 1, srclen);
1226     set_address(env, r1, dest);
1227     set_address(env, r3, src);
1228 
1229     return cc;
1230 }
1231 
1232 /* compare logical long helper */
1233 static inline uint32_t do_clcl(CPUS390XState *env,
1234                                uint64_t *src1, uint64_t *src1len,
1235                                uint64_t *src3, uint64_t *src3len,
1236                                uint16_t pad, uint64_t limit,
1237                                int wordsize, uintptr_t ra)
1238 {
1239     uint64_t len = MAX(*src1len, *src3len);
1240     uint32_t cc = 0;
1241 
1242     check_alignment(env, *src1len | *src3len, wordsize, ra);
1243 
1244     if (!len) {
1245         return cc;
1246     }
1247 
1248     /* Lest we fail to service interrupts in a timely manner, limit the
1249        amount of work we're willing to do.  */
1250     if (len > limit) {
1251         len = limit;
1252         cc = 3;
1253     }
1254 
1255     for (; len; len -= wordsize) {
1256         uint16_t v1 = pad;
1257         uint16_t v3 = pad;
1258 
1259         if (*src1len) {
1260             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1261         }
1262         if (*src3len) {
1263             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1264         }
1265 
1266         if (v1 != v3) {
1267             cc = (v1 < v3) ? 1 : 2;
1268             break;
1269         }
1270 
1271         if (*src1len) {
1272             *src1 += wordsize;
1273             *src1len -= wordsize;
1274         }
1275         if (*src3len) {
1276             *src3 += wordsize;
1277             *src3len -= wordsize;
1278         }
1279     }
1280 
1281     return cc;
1282 }
1283 
1284 
1285 /* compare logical long */
1286 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1287 {
1288     uintptr_t ra = GETPC();
1289     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1290     uint64_t src1 = get_address(env, r1);
1291     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1292     uint64_t src3 = get_address(env, r2);
1293     uint8_t pad = env->regs[r2 + 1] >> 24;
1294     uint32_t cc;
1295 
1296     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1297 
1298     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1299     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1300     set_address(env, r1, src1);
1301     set_address(env, r2, src3);
1302 
1303     return cc;
1304 }
1305 
1306 /* compare logical long extended memcompare insn with padding */
1307 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1308                        uint32_t r3)
1309 {
1310     uintptr_t ra = GETPC();
1311     uint64_t src1len = get_length(env, r1 + 1);
1312     uint64_t src1 = get_address(env, r1);
1313     uint64_t src3len = get_length(env, r3 + 1);
1314     uint64_t src3 = get_address(env, r3);
1315     uint8_t pad = a2;
1316     uint32_t cc;
1317 
1318     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1319 
1320     set_length(env, r1 + 1, src1len);
1321     set_length(env, r3 + 1, src3len);
1322     set_address(env, r1, src1);
1323     set_address(env, r3, src3);
1324 
1325     return cc;
1326 }
1327 
1328 /* compare logical long unicode memcompare insn with padding */
1329 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1330                        uint32_t r3)
1331 {
1332     uintptr_t ra = GETPC();
1333     uint64_t src1len = get_length(env, r1 + 1);
1334     uint64_t src1 = get_address(env, r1);
1335     uint64_t src3len = get_length(env, r3 + 1);
1336     uint64_t src3 = get_address(env, r3);
1337     uint16_t pad = a2;
1338     uint32_t cc = 0;
1339 
1340     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1341 
1342     set_length(env, r1 + 1, src1len);
1343     set_length(env, r3 + 1, src3len);
1344     set_address(env, r1, src1);
1345     set_address(env, r3, src3);
1346 
1347     return cc;
1348 }
1349 
1350 /* checksum */
1351 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1352                     uint64_t src, uint64_t src_len)
1353 {
1354     uintptr_t ra = GETPC();
1355     uint64_t max_len, len;
1356     uint64_t cksm = (uint32_t)r1;
1357 
1358     /* Lest we fail to service interrupts in a timely manner, limit the
1359        amount of work we're willing to do.  For now, let's cap at 8k.  */
1360     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1361 
1362     /* Process full words as available.  */
1363     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1364         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1365     }
1366 
1367     switch (max_len - len) {
1368     case 1:
1369         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1370         len += 1;
1371         break;
1372     case 2:
1373         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1374         len += 2;
1375         break;
1376     case 3:
1377         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1378         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1379         len += 3;
1380         break;
1381     }
1382 
1383     /* Fold the carry from the checksum.  Note that we can see carry-out
1384        during folding more than once (but probably not more than twice).  */
1385     while (cksm > 0xffffffffull) {
1386         cksm = (uint32_t)cksm + (cksm >> 32);
1387     }
1388 
1389     /* Indicate whether or not we've processed everything.  */
1390     env->cc_op = (len == src_len ? 0 : 3);
1391 
1392     /* Return both cksm and processed length.  */
1393     return int128_make128(cksm, len);
1394 }
1395 
1396 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1397 {
1398     uintptr_t ra = GETPC();
1399     int len_dest = len >> 4;
1400     int len_src = len & 0xf;
1401     uint8_t b;
1402 
1403     dest += len_dest;
1404     src += len_src;
1405 
1406     /* last byte is special, it only flips the nibbles */
1407     b = cpu_ldub_data_ra(env, src, ra);
1408     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1409     src--;
1410     len_src--;
1411 
1412     /* now pack every value */
1413     while (len_dest > 0) {
1414         b = 0;
1415 
1416         if (len_src >= 0) {
1417             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1418             src--;
1419             len_src--;
1420         }
1421         if (len_src >= 0) {
1422             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1423             src--;
1424             len_src--;
1425         }
1426 
1427         len_dest--;
1428         dest--;
1429         cpu_stb_data_ra(env, dest, b, ra);
1430     }
1431 }
1432 
1433 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1434                            uint32_t srclen, int ssize, uintptr_t ra)
1435 {
1436     int i;
1437     /* The destination operand is always 16 bytes long.  */
1438     const int destlen = 16;
1439 
1440     /* The operands are processed from right to left.  */
1441     src += srclen - 1;
1442     dest += destlen - 1;
1443 
1444     for (i = 0; i < destlen; i++) {
1445         uint8_t b = 0;
1446 
1447         /* Start with a positive sign */
1448         if (i == 0) {
1449             b = 0xc;
1450         } else if (srclen > ssize) {
1451             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1452             src -= ssize;
1453             srclen -= ssize;
1454         }
1455 
1456         if (srclen > ssize) {
1457             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1458             src -= ssize;
1459             srclen -= ssize;
1460         }
1461 
1462         cpu_stb_data_ra(env, dest, b, ra);
1463         dest--;
1464     }
1465 }
1466 
1467 
1468 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1469                  uint32_t srclen)
1470 {
1471     do_pkau(env, dest, src, srclen, 1, GETPC());
1472 }
1473 
1474 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1475                  uint32_t srclen)
1476 {
1477     do_pkau(env, dest, src, srclen, 2, GETPC());
1478 }
1479 
1480 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1481                   uint64_t src)
1482 {
1483     uintptr_t ra = GETPC();
1484     int len_dest = len >> 4;
1485     int len_src = len & 0xf;
1486     uint8_t b;
1487     int second_nibble = 0;
1488 
1489     dest += len_dest;
1490     src += len_src;
1491 
1492     /* last byte is special, it only flips the nibbles */
1493     b = cpu_ldub_data_ra(env, src, ra);
1494     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1495     src--;
1496     len_src--;
1497 
1498     /* now pad every nibble with 0xf0 */
1499 
1500     while (len_dest > 0) {
1501         uint8_t cur_byte = 0;
1502 
1503         if (len_src > 0) {
1504             cur_byte = cpu_ldub_data_ra(env, src, ra);
1505         }
1506 
1507         len_dest--;
1508         dest--;
1509 
1510         /* only advance one nibble at a time */
1511         if (second_nibble) {
1512             cur_byte >>= 4;
1513             len_src--;
1514             src--;
1515         }
1516         second_nibble = !second_nibble;
1517 
1518         /* digit */
1519         cur_byte = (cur_byte & 0xf);
1520         /* zone bits */
1521         cur_byte |= 0xf0;
1522 
1523         cpu_stb_data_ra(env, dest, cur_byte, ra);
1524     }
1525 }
1526 
1527 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1528                                  uint32_t destlen, int dsize, uint64_t src,
1529                                  uintptr_t ra)
1530 {
1531     int i;
1532     uint32_t cc;
1533     uint8_t b;
1534     /* The source operand is always 16 bytes long.  */
1535     const int srclen = 16;
1536 
1537     /* The operands are processed from right to left.  */
1538     src += srclen - 1;
1539     dest += destlen - dsize;
1540 
1541     /* Check for the sign.  */
1542     b = cpu_ldub_data_ra(env, src, ra);
1543     src--;
1544     switch (b & 0xf) {
1545     case 0xa:
1546     case 0xc:
1547     case 0xe ... 0xf:
1548         cc = 0;  /* plus */
1549         break;
1550     case 0xb:
1551     case 0xd:
1552         cc = 1;  /* minus */
1553         break;
1554     default:
1555     case 0x0 ... 0x9:
1556         cc = 3;  /* invalid */
1557         break;
1558     }
1559 
1560     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1561     for (i = 0; i < destlen; i += dsize) {
1562         if (i == (31 * dsize)) {
1563             /* If length is 32/64 bytes, the leftmost byte is 0. */
1564             b = 0;
1565         } else if (i % (2 * dsize)) {
1566             b = cpu_ldub_data_ra(env, src, ra);
1567             src--;
1568         } else {
1569             b >>= 4;
1570         }
1571         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1572         dest -= dsize;
1573     }
1574 
1575     return cc;
1576 }
1577 
1578 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1579                        uint64_t src)
1580 {
1581     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1582 }
1583 
1584 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1585                        uint64_t src)
1586 {
1587     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1588 }
1589 
1590 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1591 {
1592     uintptr_t ra = GETPC();
1593     uint32_t cc = 0;
1594     int i;
1595 
1596     for (i = 0; i < destlen; i++) {
1597         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1598         /* digit */
1599         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1600 
1601         if (i == (destlen - 1)) {
1602             /* sign */
1603             cc |= (b & 0xf) < 0xa ? 1 : 0;
1604         } else {
1605             /* digit */
1606             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1607         }
1608     }
1609 
1610     return cc;
1611 }
1612 
1613 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1614                              uint64_t trans, uintptr_t ra)
1615 {
1616     uint32_t i;
1617 
1618     for (i = 0; i <= len; i++) {
1619         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1620         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1621         cpu_stb_data_ra(env, array + i, new_byte, ra);
1622     }
1623 
1624     return env->cc_op;
1625 }
1626 
1627 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1628                 uint64_t trans)
1629 {
1630     do_helper_tr(env, len, array, trans, GETPC());
1631 }
1632 
1633 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1634                    uint64_t len, uint64_t trans)
1635 {
1636     uintptr_t ra = GETPC();
1637     uint8_t end = env->regs[0] & 0xff;
1638     uint64_t l = len;
1639     uint64_t i;
1640     uint32_t cc = 0;
1641 
1642     if (!(env->psw.mask & PSW_MASK_64)) {
1643         array &= 0x7fffffff;
1644         l = (uint32_t)l;
1645     }
1646 
1647     /* Lest we fail to service interrupts in a timely manner, limit the
1648        amount of work we're willing to do.  For now, let's cap at 8k.  */
1649     if (l > 0x2000) {
1650         l = 0x2000;
1651         cc = 3;
1652     }
1653 
1654     for (i = 0; i < l; i++) {
1655         uint8_t byte, new_byte;
1656 
1657         byte = cpu_ldub_data_ra(env, array + i, ra);
1658 
1659         if (byte == end) {
1660             cc = 1;
1661             break;
1662         }
1663 
1664         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1665         cpu_stb_data_ra(env, array + i, new_byte, ra);
1666     }
1667 
1668     env->cc_op = cc;
1669     return int128_make128(len - i, array + i);
1670 }
1671 
1672 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1673                                      uint64_t array, uint64_t trans,
1674                                      int inc, uintptr_t ra)
1675 {
1676     int i;
1677 
1678     for (i = 0; i <= len; i++) {
1679         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1680         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1681 
1682         if (sbyte != 0) {
1683             set_address(env, 1, array + i * inc);
1684             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1685             return (i == len) ? 2 : 1;
1686         }
1687     }
1688 
1689     return 0;
1690 }
1691 
1692 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1693                                   uint64_t array, uint64_t trans,
1694                                   uintptr_t ra)
1695 {
1696     return do_helper_trt(env, len, array, trans, 1, ra);
1697 }
1698 
1699 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1700                      uint64_t trans)
1701 {
1702     return do_helper_trt(env, len, array, trans, 1, GETPC());
1703 }
1704 
1705 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1706                                    uint64_t array, uint64_t trans,
1707                                    uintptr_t ra)
1708 {
1709     return do_helper_trt(env, len, array, trans, -1, ra);
1710 }
1711 
1712 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1713                       uint64_t trans)
1714 {
1715     return do_helper_trt(env, len, array, trans, -1, GETPC());
1716 }
1717 
1718 /* Translate one/two to one/two */
1719 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1720                       uint32_t tst, uint32_t sizes)
1721 {
1722     uintptr_t ra = GETPC();
1723     int dsize = (sizes & 1) ? 1 : 2;
1724     int ssize = (sizes & 2) ? 1 : 2;
1725     uint64_t tbl = get_address(env, 1);
1726     uint64_t dst = get_address(env, r1);
1727     uint64_t len = get_length(env, r1 + 1);
1728     uint64_t src = get_address(env, r2);
1729     uint32_t cc = 3;
1730     int i;
1731 
1732     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1733        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1734        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1735     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1736         tbl &= -4096;
1737     } else {
1738         tbl &= -8;
1739     }
1740 
1741     check_alignment(env, len, ssize, ra);
1742 
1743     /* Lest we fail to service interrupts in a timely manner, */
1744     /* limit the amount of work we're willing to do.   */
1745     for (i = 0; i < 0x2000; i++) {
1746         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1747         uint64_t tble = tbl + (sval * dsize);
1748         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1749         if (dval == tst) {
1750             cc = 1;
1751             break;
1752         }
1753         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1754 
1755         len -= ssize;
1756         src += ssize;
1757         dst += dsize;
1758 
1759         if (len == 0) {
1760             cc = 0;
1761             break;
1762         }
1763     }
1764 
1765     set_address(env, r1, dst);
1766     set_length(env, r1 + 1, len);
1767     set_address(env, r2, src);
1768 
1769     return cc;
1770 }
1771 
1772 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1773                         uint64_t a2, bool parallel)
1774 {
1775     uint32_t mem_idx = s390x_env_mmu_index(env, false);
1776     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1777     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1778     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1779     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1780     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1781     uintptr_t ra = GETPC();
1782     uint32_t fc = extract32(env->regs[0], 0, 8);
1783     uint32_t sc = extract32(env->regs[0], 8, 8);
1784     uint64_t pl = get_address(env, 1) & -16;
1785     uint64_t svh, svl;
1786     uint32_t cc;
1787 
1788     /* Sanity check the function code and storage characteristic.  */
1789     if (fc > 1 || sc > 3) {
1790         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1791             goto spec_exception;
1792         }
1793         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1794             goto spec_exception;
1795         }
1796     }
1797 
1798     /* Sanity check the alignments.  */
1799     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1800         goto spec_exception;
1801     }
1802 
1803     /* Sanity check writability of the store address.  */
1804     probe_write(env, a2, 1 << sc, mem_idx, ra);
1805 
1806     /*
1807      * Note that the compare-and-swap is atomic, and the store is atomic,
1808      * but the complete operation is not.  Therefore we do not need to
1809      * assert serial context in order to implement this.  That said,
1810      * restart early if we can't support either operation that is supposed
1811      * to be atomic.
1812      */
1813     if (parallel) {
1814         uint32_t max = 2;
1815 #ifdef CONFIG_ATOMIC64
1816         max = 3;
1817 #endif
1818         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1819             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1820             cpu_loop_exit_atomic(env_cpu(env), ra);
1821         }
1822     }
1823 
1824     /*
1825      * All loads happen before all stores.  For simplicity, load the entire
1826      * store value area from the parameter list.
1827      */
1828     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1829     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1830 
1831     switch (fc) {
1832     case 0:
1833         {
1834             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1835             uint32_t cv = env->regs[r3];
1836             uint32_t ov;
1837 
1838             if (parallel) {
1839                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1840             } else {
1841                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1842                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1843             }
1844             cc = (ov != cv);
1845             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1846         }
1847         break;
1848 
1849     case 1:
1850         {
1851             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1852             uint64_t cv = env->regs[r3];
1853             uint64_t ov;
1854 
1855             if (parallel) {
1856 #ifdef CONFIG_ATOMIC64
1857                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1858 #else
1859                 /* Note that we asserted !parallel above.  */
1860                 g_assert_not_reached();
1861 #endif
1862             } else {
1863                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1864                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1865             }
1866             cc = (ov != cv);
1867             env->regs[r3] = ov;
1868         }
1869         break;
1870 
1871     case 2:
1872         {
1873             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1874             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1875             Int128 ov;
1876 
1877             if (!parallel) {
1878                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1879                 cc = !int128_eq(ov, cv);
1880                 if (cc) {
1881                     nv = ov;
1882                 }
1883                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1884             } else if (HAVE_CMPXCHG128) {
1885                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1886                 cc = !int128_eq(ov, cv);
1887             } else {
1888                 /* Note that we asserted !parallel above.  */
1889                 g_assert_not_reached();
1890             }
1891 
1892             env->regs[r3 + 0] = int128_gethi(ov);
1893             env->regs[r3 + 1] = int128_getlo(ov);
1894         }
1895         break;
1896 
1897     default:
1898         g_assert_not_reached();
1899     }
1900 
1901     /* Store only if the comparison succeeded.  Note that above we use a pair
1902        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1903        from the most-significant bits of svh.  */
1904     if (cc == 0) {
1905         switch (sc) {
1906         case 0:
1907             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1908             break;
1909         case 1:
1910             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1911             break;
1912         case 2:
1913             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1914             break;
1915         case 3:
1916             cpu_stq_mmu(env, a2, svh, oi8, ra);
1917             break;
1918         case 4:
1919             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1920             break;
1921         default:
1922             g_assert_not_reached();
1923         }
1924     }
1925 
1926     return cc;
1927 
1928  spec_exception:
1929     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1930 }
1931 
1932 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1933 {
1934     return do_csst(env, r3, a1, a2, false);
1935 }
1936 
1937 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1938                                uint64_t a2)
1939 {
1940     return do_csst(env, r3, a1, a2, true);
1941 }
1942 
1943 #if !defined(CONFIG_USER_ONLY)
1944 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1945 {
1946     uintptr_t ra = GETPC();
1947     bool PERchanged = false;
1948     uint64_t src = a2;
1949     uint32_t i;
1950 
1951     if (src & 0x7) {
1952         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1953     }
1954 
1955     for (i = r1;; i = (i + 1) % 16) {
1956         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1957         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1958             PERchanged = true;
1959         }
1960         env->cregs[i] = val;
1961         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1962                    i, src, val);
1963         src += sizeof(uint64_t);
1964 
1965         if (i == r3) {
1966             break;
1967         }
1968     }
1969 
1970     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1971         s390_cpu_recompute_watchpoints(env_cpu(env));
1972     }
1973 
1974     tlb_flush(env_cpu(env));
1975 }
1976 
1977 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1978 {
1979     uintptr_t ra = GETPC();
1980     bool PERchanged = false;
1981     uint64_t src = a2;
1982     uint32_t i;
1983 
1984     if (src & 0x3) {
1985         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1986     }
1987 
1988     for (i = r1;; i = (i + 1) % 16) {
1989         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1990         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1991             PERchanged = true;
1992         }
1993         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1994         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1995         src += sizeof(uint32_t);
1996 
1997         if (i == r3) {
1998             break;
1999         }
2000     }
2001 
2002     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2003         s390_cpu_recompute_watchpoints(env_cpu(env));
2004     }
2005 
2006     tlb_flush(env_cpu(env));
2007 }
2008 
2009 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2010 {
2011     uintptr_t ra = GETPC();
2012     uint64_t dest = a2;
2013     uint32_t i;
2014 
2015     if (dest & 0x7) {
2016         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2017     }
2018 
2019     for (i = r1;; i = (i + 1) % 16) {
2020         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2021         dest += sizeof(uint64_t);
2022 
2023         if (i == r3) {
2024             break;
2025         }
2026     }
2027 }
2028 
2029 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2030 {
2031     uintptr_t ra = GETPC();
2032     uint64_t dest = a2;
2033     uint32_t i;
2034 
2035     if (dest & 0x3) {
2036         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2037     }
2038 
2039     for (i = r1;; i = (i + 1) % 16) {
2040         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2041         dest += sizeof(uint32_t);
2042 
2043         if (i == r3) {
2044             break;
2045         }
2046     }
2047 }
2048 
2049 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2050 {
2051     uintptr_t ra = GETPC();
2052     int i;
2053 
2054     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2055 
2056     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2057         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2058     }
2059 
2060     return 0;
2061 }
2062 
2063 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2064 {
2065     S390CPU *cpu = env_archcpu(env);
2066     CPUState *cs = env_cpu(env);
2067 
2068     /*
2069      * TODO: we currently don't handle all access protection types
2070      * (including access-list and key-controlled) as well as AR mode.
2071      */
2072     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2073         /* Fetching permitted; storing permitted */
2074         return 0;
2075     }
2076 
2077     if (env->int_pgm_code == PGM_PROTECTION) {
2078         /* retry if reading is possible */
2079         cs->exception_index = -1;
2080         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2081             /* Fetching permitted; storing not permitted */
2082             return 1;
2083         }
2084     }
2085 
2086     switch (env->int_pgm_code) {
2087     case PGM_PROTECTION:
2088         /* Fetching not permitted; storing not permitted */
2089         cs->exception_index = -1;
2090         return 2;
2091     case PGM_ADDRESSING:
2092     case PGM_TRANS_SPEC:
2093         /* exceptions forwarded to the guest */
2094         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2095         return 0;
2096     }
2097 
2098     /* Translation not available */
2099     cs->exception_index = -1;
2100     return 3;
2101 }
2102 
2103 /* insert storage key extended */
2104 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2105 {
2106     static S390SKeysState *ss;
2107     static S390SKeysClass *skeyclass;
2108     uint64_t addr = wrap_address(env, r2);
2109     uint8_t key;
2110     int rc;
2111 
2112     addr = mmu_real2abs(env, addr);
2113     if (!mmu_absolute_addr_valid(addr, false)) {
2114         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2115     }
2116 
2117     if (unlikely(!ss)) {
2118         ss = s390_get_skeys_device();
2119         skeyclass = S390_SKEYS_GET_CLASS(ss);
2120         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2121             tlb_flush_all_cpus_synced(env_cpu(env));
2122         }
2123     }
2124 
2125     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2126     if (rc) {
2127         return 0;
2128     }
2129     return key;
2130 }
2131 
2132 /* set storage key extended */
2133 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2134 {
2135     static S390SKeysState *ss;
2136     static S390SKeysClass *skeyclass;
2137     uint64_t addr = wrap_address(env, r2);
2138     uint8_t key;
2139 
2140     addr = mmu_real2abs(env, addr);
2141     if (!mmu_absolute_addr_valid(addr, false)) {
2142         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2143     }
2144 
2145     if (unlikely(!ss)) {
2146         ss = s390_get_skeys_device();
2147         skeyclass = S390_SKEYS_GET_CLASS(ss);
2148         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2149             tlb_flush_all_cpus_synced(env_cpu(env));
2150         }
2151     }
2152 
2153     key = r1 & 0xfe;
2154     s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2155    /*
2156     * As we can only flush by virtual address and not all the entries
2157     * that point to a physical address we have to flush the whole TLB.
2158     */
2159     tlb_flush_all_cpus_synced(env_cpu(env));
2160 }
2161 
2162 /* reset reference bit extended */
2163 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2164 {
2165     uint64_t addr = wrap_address(env, r2);
2166     static S390SKeysState *ss;
2167     static S390SKeysClass *skeyclass;
2168     uint8_t re, key;
2169     int rc;
2170 
2171     addr = mmu_real2abs(env, addr);
2172     if (!mmu_absolute_addr_valid(addr, false)) {
2173         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2174     }
2175 
2176     if (unlikely(!ss)) {
2177         ss = s390_get_skeys_device();
2178         skeyclass = S390_SKEYS_GET_CLASS(ss);
2179         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2180             tlb_flush_all_cpus_synced(env_cpu(env));
2181         }
2182     }
2183 
2184     rc = s390_skeys_get(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2185     if (rc) {
2186         return 0;
2187     }
2188 
2189     re = key & (SK_R | SK_C);
2190     key &= ~SK_R;
2191 
2192     rc = s390_skeys_set(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2193     if (rc) {
2194         return 0;
2195     }
2196    /*
2197     * As we can only flush by virtual address and not all the entries
2198     * that point to a physical address we have to flush the whole TLB.
2199     */
2200     tlb_flush_all_cpus_synced(env_cpu(env));
2201 
2202     /*
2203      * cc
2204      *
2205      * 0  Reference bit zero; change bit zero
2206      * 1  Reference bit zero; change bit one
2207      * 2  Reference bit one; change bit zero
2208      * 3  Reference bit one; change bit one
2209      */
2210 
2211     return re >> 1;
2212 }
2213 
2214 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2215                       uint64_t key)
2216 {
2217     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2218     S390Access srca, desta;
2219     uintptr_t ra = GETPC();
2220     int cc = 0;
2221 
2222     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2223                __func__, l, a1, a2);
2224 
2225     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2226         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2227         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2228     }
2229 
2230     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2231         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2232     }
2233 
2234     l = wrap_length32(env, l);
2235     if (l > 256) {
2236         /* max 256 */
2237         l = 256;
2238         cc = 3;
2239     } else if (!l) {
2240         return cc;
2241     }
2242 
2243     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2244     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2245     access_memmove(env, &desta, &srca, ra);
2246     return cc;
2247 }
2248 
2249 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2250                       uint64_t key)
2251 {
2252     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2253     S390Access srca, desta;
2254     uintptr_t ra = GETPC();
2255     int cc = 0;
2256 
2257     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2258                __func__, l, a1, a2);
2259 
2260     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2261         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2262         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2263     }
2264 
2265     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2266         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2267     }
2268 
2269     l = wrap_length32(env, l);
2270     if (l > 256) {
2271         /* max 256 */
2272         l = 256;
2273         cc = 3;
2274     } else if (!l) {
2275         return cc;
2276     }
2277     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2278     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2279     access_memmove(env, &desta, &srca, ra);
2280     return cc;
2281 }
2282 
2283 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2284 {
2285     CPUState *cs = env_cpu(env);
2286     const uintptr_t ra = GETPC();
2287     uint64_t table, entry, raddr;
2288     uint16_t entries, i, index = 0;
2289 
2290     if (r2 & 0xff000) {
2291         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2292     }
2293 
2294     if (!(r2 & 0x800)) {
2295         /* invalidation-and-clearing operation */
2296         table = r1 & ASCE_ORIGIN;
2297         entries = (r2 & 0x7ff) + 1;
2298 
2299         switch (r1 & ASCE_TYPE_MASK) {
2300         case ASCE_TYPE_REGION1:
2301             index = (r2 >> 53) & 0x7ff;
2302             break;
2303         case ASCE_TYPE_REGION2:
2304             index = (r2 >> 42) & 0x7ff;
2305             break;
2306         case ASCE_TYPE_REGION3:
2307             index = (r2 >> 31) & 0x7ff;
2308             break;
2309         case ASCE_TYPE_SEGMENT:
2310             index = (r2 >> 20) & 0x7ff;
2311             break;
2312         }
2313         for (i = 0; i < entries; i++) {
2314             /* addresses are not wrapped in 24/31bit mode but table index is */
2315             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2316             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2317             if (!(entry & REGION_ENTRY_I)) {
2318                 /* we are allowed to not store if already invalid */
2319                 entry |= REGION_ENTRY_I;
2320                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2321             }
2322         }
2323     }
2324 
2325     /* We simply flush the complete tlb, therefore we can ignore r3. */
2326     if (m4 & 1) {
2327         tlb_flush(cs);
2328     } else {
2329         tlb_flush_all_cpus_synced(cs);
2330     }
2331 }
2332 
2333 /* invalidate pte */
2334 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2335                   uint32_t m4)
2336 {
2337     CPUState *cs = env_cpu(env);
2338     const uintptr_t ra = GETPC();
2339     uint64_t page = vaddr & TARGET_PAGE_MASK;
2340     uint64_t pte_addr, pte;
2341 
2342     /* Compute the page table entry address */
2343     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2344     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2345 
2346     /* Mark the page table entry as invalid */
2347     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2348     pte |= PAGE_ENTRY_I;
2349     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2350 
2351     /* XXX we exploit the fact that Linux passes the exact virtual
2352        address here - it's not obliged to! */
2353     if (m4 & 1) {
2354         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2355             tlb_flush_page(cs, page);
2356             /* XXX 31-bit hack */
2357             tlb_flush_page(cs, page ^ 0x80000000);
2358         } else {
2359             /* looks like we don't have a valid virtual address */
2360             tlb_flush(cs);
2361         }
2362     } else {
2363         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2364             tlb_flush_page_all_cpus_synced(cs, page);
2365             /* XXX 31-bit hack */
2366             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2367         } else {
2368             /* looks like we don't have a valid virtual address */
2369             tlb_flush_all_cpus_synced(cs);
2370         }
2371     }
2372 }
2373 
2374 /* flush local tlb */
2375 void HELPER(ptlb)(CPUS390XState *env)
2376 {
2377     tlb_flush(env_cpu(env));
2378 }
2379 
2380 /* flush global tlb */
2381 void HELPER(purge)(CPUS390XState *env)
2382 {
2383     tlb_flush_all_cpus_synced(env_cpu(env));
2384 }
2385 
2386 /* load real address */
2387 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2388 {
2389     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2390     uint64_t ret, tec;
2391     int flags, exc, cc;
2392 
2393     /* XXX incomplete - has more corner cases */
2394     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2395         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2396     }
2397 
2398     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2399     if (exc) {
2400         cc = 3;
2401         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2402     } else {
2403         cc = 0;
2404         ret |= addr & ~TARGET_PAGE_MASK;
2405     }
2406 
2407     env->cc_op = cc;
2408     return ret;
2409 }
2410 #endif
2411 
2412 /* Execute instruction.  This instruction executes an insn modified with
2413    the contents of r1.  It does not change the executed instruction in memory;
2414    it does not change the program counter.
2415 
2416    Perform this by recording the modified instruction in env->ex_value.
2417    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2418 */
2419 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2420 {
2421     uint64_t insn;
2422     uint8_t opc;
2423 
2424     /* EXECUTE targets must be at even addresses.  */
2425     if (addr & 1) {
2426         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2427     }
2428 
2429     insn = cpu_lduw_code(env, addr);
2430     opc = insn >> 8;
2431 
2432     /* Or in the contents of R1[56:63].  */
2433     insn |= r1 & 0xff;
2434 
2435     /* Load the rest of the instruction.  */
2436     insn <<= 48;
2437     switch (get_ilen(opc)) {
2438     case 2:
2439         break;
2440     case 4:
2441         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2442         break;
2443     case 6:
2444         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2445         break;
2446     default:
2447         g_assert_not_reached();
2448     }
2449 
2450     /* The very most common cases can be sped up by avoiding a new TB.  */
2451     if ((opc & 0xf0) == 0xd0) {
2452         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2453                                       uint64_t, uintptr_t);
2454         static const dx_helper dx[16] = {
2455             [0x0] = do_helper_trt_bkwd,
2456             [0x2] = do_helper_mvc,
2457             [0x4] = do_helper_nc,
2458             [0x5] = do_helper_clc,
2459             [0x6] = do_helper_oc,
2460             [0x7] = do_helper_xc,
2461             [0xc] = do_helper_tr,
2462             [0xd] = do_helper_trt_fwd,
2463         };
2464         dx_helper helper = dx[opc & 0xf];
2465 
2466         if (helper) {
2467             uint32_t l = extract64(insn, 48, 8);
2468             uint32_t b1 = extract64(insn, 44, 4);
2469             uint32_t d1 = extract64(insn, 32, 12);
2470             uint32_t b2 = extract64(insn, 28, 4);
2471             uint32_t d2 = extract64(insn, 16, 12);
2472             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2473             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2474 
2475             env->cc_op = helper(env, l, a1, a2, 0);
2476             env->psw.addr += ilen;
2477             return;
2478         }
2479     } else if (opc == 0x0a) {
2480         env->int_svc_code = extract64(insn, 48, 8);
2481         env->int_svc_ilen = ilen;
2482         helper_exception(env, EXCP_SVC);
2483         g_assert_not_reached();
2484     }
2485 
2486     /* Record the insn we want to execute as well as the ilen to use
2487        during the execution of the target insn.  This will also ensure
2488        that ex_value is non-zero, which flags that we are in a state
2489        that requires such execution.  */
2490     env->ex_value = insn | ilen;
2491     env->ex_target = addr;
2492 }
2493 
2494 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2495                        uint64_t len)
2496 {
2497     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2498     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2499     const uint64_t r0 = env->regs[0];
2500     const uintptr_t ra = GETPC();
2501     uint8_t dest_key, dest_as, dest_k, dest_a;
2502     uint8_t src_key, src_as, src_k, src_a;
2503     uint64_t val;
2504     int cc = 0;
2505 
2506     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2507                __func__, dest, src, len);
2508 
2509     if (!(env->psw.mask & PSW_MASK_DAT)) {
2510         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2511     }
2512 
2513     /* OAC (operand access control) for the first operand -> dest */
2514     val = (r0 & 0xffff0000ULL) >> 16;
2515     dest_key = (val >> 12) & 0xf;
2516     dest_as = (val >> 6) & 0x3;
2517     dest_k = (val >> 1) & 0x1;
2518     dest_a = val & 0x1;
2519 
2520     /* OAC (operand access control) for the second operand -> src */
2521     val = (r0 & 0x0000ffffULL);
2522     src_key = (val >> 12) & 0xf;
2523     src_as = (val >> 6) & 0x3;
2524     src_k = (val >> 1) & 0x1;
2525     src_a = val & 0x1;
2526 
2527     if (!dest_k) {
2528         dest_key = psw_key;
2529     }
2530     if (!src_k) {
2531         src_key = psw_key;
2532     }
2533     if (!dest_a) {
2534         dest_as = psw_as;
2535     }
2536     if (!src_a) {
2537         src_as = psw_as;
2538     }
2539 
2540     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2541         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2542     }
2543     if (!(env->cregs[0] & CR0_SECONDARY) &&
2544         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2545         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2546     }
2547     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2548         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2549     }
2550 
2551     len = wrap_length32(env, len);
2552     if (len > 4096) {
2553         cc = 3;
2554         len = 4096;
2555     }
2556 
2557     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2558     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2559         (env->psw.mask & PSW_MASK_PSTATE)) {
2560         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2561                       __func__);
2562         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2563     }
2564 
2565     /* FIXME: Access using correct keys and AR-mode */
2566     if (len) {
2567         S390Access srca, desta;
2568 
2569         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2570                        mmu_idx_from_as(src_as), ra);
2571         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2572                        mmu_idx_from_as(dest_as), ra);
2573 
2574         access_memmove(env, &desta, &srca, ra);
2575     }
2576 
2577     return cc;
2578 }
2579 
2580 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2581    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2582    value >= 0 indicates failure, and the CC value to be returned.  */
2583 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2584                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2585                                  uint32_t *ochar, uint32_t *olen);
2586 
2587 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2588    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2589    indicates failure, and the CC value to be returned.  */
2590 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2591                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2592                                  uint32_t *olen);
2593 
2594 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2595                        bool enh_check, uintptr_t ra,
2596                        uint32_t *ochar, uint32_t *olen)
2597 {
2598     uint8_t s0, s1, s2, s3;
2599     uint32_t c, l;
2600 
2601     if (ilen < 1) {
2602         return 0;
2603     }
2604     s0 = cpu_ldub_data_ra(env, addr, ra);
2605     if (s0 <= 0x7f) {
2606         /* one byte character */
2607         l = 1;
2608         c = s0;
2609     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2610         /* invalid character */
2611         return 2;
2612     } else if (s0 <= 0xdf) {
2613         /* two byte character */
2614         l = 2;
2615         if (ilen < 2) {
2616             return 0;
2617         }
2618         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2619         c = s0 & 0x1f;
2620         c = (c << 6) | (s1 & 0x3f);
2621         if (enh_check && (s1 & 0xc0) != 0x80) {
2622             return 2;
2623         }
2624     } else if (s0 <= 0xef) {
2625         /* three byte character */
2626         l = 3;
2627         if (ilen < 3) {
2628             return 0;
2629         }
2630         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2631         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2632         c = s0 & 0x0f;
2633         c = (c << 6) | (s1 & 0x3f);
2634         c = (c << 6) | (s2 & 0x3f);
2635         /* Fold the byte-by-byte range descriptions in the PoO into
2636            tests against the complete value.  It disallows encodings
2637            that could be smaller, and the UTF-16 surrogates.  */
2638         if (enh_check
2639             && ((s1 & 0xc0) != 0x80
2640                 || (s2 & 0xc0) != 0x80
2641                 || c < 0x1000
2642                 || (c >= 0xd800 && c <= 0xdfff))) {
2643             return 2;
2644         }
2645     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2646         /* four byte character */
2647         l = 4;
2648         if (ilen < 4) {
2649             return 0;
2650         }
2651         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2652         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2653         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2654         c = s0 & 0x07;
2655         c = (c << 6) | (s1 & 0x3f);
2656         c = (c << 6) | (s2 & 0x3f);
2657         c = (c << 6) | (s3 & 0x3f);
2658         /* See above.  */
2659         if (enh_check
2660             && ((s1 & 0xc0) != 0x80
2661                 || (s2 & 0xc0) != 0x80
2662                 || (s3 & 0xc0) != 0x80
2663                 || c < 0x010000
2664                 || c > 0x10ffff)) {
2665             return 2;
2666         }
2667     } else {
2668         /* invalid character */
2669         return 2;
2670     }
2671 
2672     *ochar = c;
2673     *olen = l;
2674     return -1;
2675 }
2676 
2677 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2678                         bool enh_check, uintptr_t ra,
2679                         uint32_t *ochar, uint32_t *olen)
2680 {
2681     uint16_t s0, s1;
2682     uint32_t c, l;
2683 
2684     if (ilen < 2) {
2685         return 0;
2686     }
2687     s0 = cpu_lduw_data_ra(env, addr, ra);
2688     if ((s0 & 0xfc00) != 0xd800) {
2689         /* one word character */
2690         l = 2;
2691         c = s0;
2692     } else {
2693         /* two word character */
2694         l = 4;
2695         if (ilen < 4) {
2696             return 0;
2697         }
2698         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2699         c = extract32(s0, 6, 4) + 1;
2700         c = (c << 6) | (s0 & 0x3f);
2701         c = (c << 10) | (s1 & 0x3ff);
2702         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2703             /* invalid surrogate character */
2704             return 2;
2705         }
2706     }
2707 
2708     *ochar = c;
2709     *olen = l;
2710     return -1;
2711 }
2712 
2713 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2714                         bool enh_check, uintptr_t ra,
2715                         uint32_t *ochar, uint32_t *olen)
2716 {
2717     uint32_t c;
2718 
2719     if (ilen < 4) {
2720         return 0;
2721     }
2722     c = cpu_ldl_data_ra(env, addr, ra);
2723     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2724         /* invalid unicode character */
2725         return 2;
2726     }
2727 
2728     *ochar = c;
2729     *olen = 4;
2730     return -1;
2731 }
2732 
2733 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2734                        uintptr_t ra, uint32_t c, uint32_t *olen)
2735 {
2736     uint8_t d[4];
2737     uint32_t l, i;
2738 
2739     if (c <= 0x7f) {
2740         /* one byte character */
2741         l = 1;
2742         d[0] = c;
2743     } else if (c <= 0x7ff) {
2744         /* two byte character */
2745         l = 2;
2746         d[1] = 0x80 | extract32(c, 0, 6);
2747         d[0] = 0xc0 | extract32(c, 6, 5);
2748     } else if (c <= 0xffff) {
2749         /* three byte character */
2750         l = 3;
2751         d[2] = 0x80 | extract32(c, 0, 6);
2752         d[1] = 0x80 | extract32(c, 6, 6);
2753         d[0] = 0xe0 | extract32(c, 12, 4);
2754     } else {
2755         /* four byte character */
2756         l = 4;
2757         d[3] = 0x80 | extract32(c, 0, 6);
2758         d[2] = 0x80 | extract32(c, 6, 6);
2759         d[1] = 0x80 | extract32(c, 12, 6);
2760         d[0] = 0xf0 | extract32(c, 18, 3);
2761     }
2762 
2763     if (ilen < l) {
2764         return 1;
2765     }
2766     for (i = 0; i < l; ++i) {
2767         cpu_stb_data_ra(env, addr + i, d[i], ra);
2768     }
2769 
2770     *olen = l;
2771     return -1;
2772 }
2773 
2774 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2775                         uintptr_t ra, uint32_t c, uint32_t *olen)
2776 {
2777     uint16_t d0, d1;
2778 
2779     if (c <= 0xffff) {
2780         /* one word character */
2781         if (ilen < 2) {
2782             return 1;
2783         }
2784         cpu_stw_data_ra(env, addr, c, ra);
2785         *olen = 2;
2786     } else {
2787         /* two word character */
2788         if (ilen < 4) {
2789             return 1;
2790         }
2791         d1 = 0xdc00 | extract32(c, 0, 10);
2792         d0 = 0xd800 | extract32(c, 10, 6);
2793         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2794         cpu_stw_data_ra(env, addr + 0, d0, ra);
2795         cpu_stw_data_ra(env, addr + 2, d1, ra);
2796         *olen = 4;
2797     }
2798 
2799     return -1;
2800 }
2801 
2802 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2803                         uintptr_t ra, uint32_t c, uint32_t *olen)
2804 {
2805     if (ilen < 4) {
2806         return 1;
2807     }
2808     cpu_stl_data_ra(env, addr, c, ra);
2809     *olen = 4;
2810     return -1;
2811 }
2812 
2813 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2814                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2815                                        decode_unicode_fn decode,
2816                                        encode_unicode_fn encode)
2817 {
2818     uint64_t dst = get_address(env, r1);
2819     uint64_t dlen = get_length(env, r1 + 1);
2820     uint64_t src = get_address(env, r2);
2821     uint64_t slen = get_length(env, r2 + 1);
2822     bool enh_check = m3 & 1;
2823     int cc, i;
2824 
2825     /* Lest we fail to service interrupts in a timely manner, limit the
2826        amount of work we're willing to do.  For now, let's cap at 256.  */
2827     for (i = 0; i < 256; ++i) {
2828         uint32_t c, ilen, olen;
2829 
2830         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2831         if (unlikely(cc >= 0)) {
2832             break;
2833         }
2834         cc = encode(env, dst, dlen, ra, c, &olen);
2835         if (unlikely(cc >= 0)) {
2836             break;
2837         }
2838 
2839         src += ilen;
2840         slen -= ilen;
2841         dst += olen;
2842         dlen -= olen;
2843         cc = 3;
2844     }
2845 
2846     set_address(env, r1, dst);
2847     set_length(env, r1 + 1, dlen);
2848     set_address(env, r2, src);
2849     set_length(env, r2 + 1, slen);
2850 
2851     return cc;
2852 }
2853 
2854 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2855 {
2856     return convert_unicode(env, r1, r2, m3, GETPC(),
2857                            decode_utf8, encode_utf16);
2858 }
2859 
2860 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2861 {
2862     return convert_unicode(env, r1, r2, m3, GETPC(),
2863                            decode_utf8, encode_utf32);
2864 }
2865 
2866 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2867 {
2868     return convert_unicode(env, r1, r2, m3, GETPC(),
2869                            decode_utf16, encode_utf8);
2870 }
2871 
2872 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2873 {
2874     return convert_unicode(env, r1, r2, m3, GETPC(),
2875                            decode_utf16, encode_utf32);
2876 }
2877 
2878 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2879 {
2880     return convert_unicode(env, r1, r2, m3, GETPC(),
2881                            decode_utf32, encode_utf8);
2882 }
2883 
2884 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2885 {
2886     return convert_unicode(env, r1, r2, m3, GETPC(),
2887                            decode_utf32, encode_utf16);
2888 }
2889 
2890 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2891                         uintptr_t ra)
2892 {
2893     const int mmu_idx = s390x_env_mmu_index(env, false);
2894 
2895     /* test the actual access, not just any access to the page due to LAP */
2896     while (len) {
2897         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2898         const uint64_t curlen = MIN(pagelen, len);
2899 
2900         probe_write(env, addr, curlen, mmu_idx, ra);
2901         addr = wrap_address(env, addr + curlen);
2902         len -= curlen;
2903     }
2904 }
2905 
2906 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2907 {
2908     probe_write_access(env, addr, len, GETPC());
2909 }
2910